From ff49ea854a154808046d031bca6f473069d3e760 Mon Sep 17 00:00:00 2001 From: Felicitas Pojtinger Date: Tue, 3 Dec 2024 21:51:24 -0800 Subject: [PATCH 01/78] refactor: Disable TSC scaling for PVM Signed-off-by: Felicitas Pojtinger --- src/vmm/src/vstate/vcpu/x86_64.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/vmm/src/vstate/vcpu/x86_64.rs b/src/vmm/src/vstate/vcpu/x86_64.rs index a1bb22d1bb7..bb581d1ca83 100644 --- a/src/vmm/src/vstate/vcpu/x86_64.rs +++ b/src/vmm/src/vstate/vcpu/x86_64.rs @@ -561,8 +561,12 @@ impl KvmVcpu { } /// Scale the TSC frequency of this vCPU to the one provided as a parameter. - pub fn set_tsc_khz(&self, tsc_freq: u32) -> Result<(), SetTscError> { - self.fd.set_tsc_khz(tsc_freq).map_err(SetTscError) + pub fn set_tsc_khz(&self, _: u32) -> Result<(), SetTscError> { + // Disable TSC scaling when using PVM because it is unsupported on most virtualized platforms. + // Even on supported platforms like virtualized AMD CPUs, enabling TSC scaling can cause VM freezes + // after resuming from a snapshot. + // For more details, see https://github.com/virt-pvm/linux/issues/12#issue-2515360332 + Ok(()) } /// Use provided state to populate KVM internal state. From 32826d359f33f7d2b4d7240b32e4850d55599ebe Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:15:17 +0000 Subject: [PATCH 02/78] build(deps): Bump the firecracker group with 10 updates Bumps the firecracker group with 10 updates: | Package | From | To | | --- | --- | --- | | [thiserror](https://github.com/dtolnay/thiserror) | `2.0.7` | `2.0.9` | | [syn](https://github.com/dtolnay/syn) | `2.0.90` | `2.0.91` | | [libc](https://github.com/rust-lang/libc) | `0.2.168` | `0.2.169` | | [serde_json](https://github.com/serde-rs/json) | `1.0.133` | `1.0.134` | | [env_logger](https://github.com/rust-cli/env_logger) | `0.11.5` | `0.11.6` | | [aws-lc-rs](https://github.com/aws/aws-lc-rs) | `1.11.1` | `1.12.0` | | [aws-lc-fips-sys](https://github.com/aws/aws-lc-rs) | `0.12.15` | `0.13.0` | | [aws-lc-sys](https://github.com/aws/aws-lc-rs) | `0.23.1` | `0.24.0` | | [cc](https://github.com/rust-lang/cc-rs) | `1.2.4` | `1.2.5` | | [env_filter](https://github.com/rust-cli/env_logger) | `0.1.2` | `0.1.3` | Updates `thiserror` from 2.0.7 to 2.0.9 - [Release notes](https://github.com/dtolnay/thiserror/releases) - [Commits](https://github.com/dtolnay/thiserror/compare/2.0.7...2.0.9) Updates `syn` from 2.0.90 to 2.0.91 - [Release notes](https://github.com/dtolnay/syn/releases) - [Commits](https://github.com/dtolnay/syn/compare/2.0.90...2.0.91) Updates `libc` from 0.2.168 to 0.2.169 - [Release notes](https://github.com/rust-lang/libc/releases) - [Changelog](https://github.com/rust-lang/libc/blob/0.2.169/CHANGELOG.md) - [Commits](https://github.com/rust-lang/libc/compare/0.2.168...0.2.169) Updates `serde_json` from 1.0.133 to 1.0.134 - [Release notes](https://github.com/serde-rs/json/releases) - [Commits](https://github.com/serde-rs/json/compare/v1.0.133...v1.0.134) Updates `env_logger` from 0.11.5 to 0.11.6 - [Release notes](https://github.com/rust-cli/env_logger/releases) - [Changelog](https://github.com/rust-cli/env_logger/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-cli/env_logger/compare/v0.11.5...v0.11.6) Updates `aws-lc-rs` from 1.11.1 to 1.12.0 - [Release notes](https://github.com/aws/aws-lc-rs/releases) - [Commits](https://github.com/aws/aws-lc-rs/compare/v1.11.1...v1.12.0) Updates `aws-lc-fips-sys` from 0.12.15 to 0.13.0 - [Release notes](https://github.com/aws/aws-lc-rs/releases) - [Commits](https://github.com/aws/aws-lc-rs/compare/aws-lc-fips-sys/v0.12.15...aws-lc-fips-sys/v0.13.0) Updates `aws-lc-sys` from 0.23.1 to 0.24.0 - [Release notes](https://github.com/aws/aws-lc-rs/releases) - [Commits](https://github.com/aws/aws-lc-rs/compare/aws-lc-sys/v0.23.1...aws-lc-sys/v0.24.0) Updates `cc` from 1.2.4 to 1.2.5 - [Release notes](https://github.com/rust-lang/cc-rs/releases) - [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-lang/cc-rs/compare/cc-v1.2.4...cc-v1.2.5) Updates `env_filter` from 0.1.2 to 0.1.3 - [Release notes](https://github.com/rust-cli/env_logger/releases) - [Changelog](https://github.com/rust-cli/env_logger/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-cli/env_logger/compare/env_filter-v0.1.2...env_filter-v0.1.3) --- updated-dependencies: - dependency-name: thiserror dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: syn dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: libc dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: serde_json dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: env_logger dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: aws-lc-rs dependency-type: direct:production update-type: version-update:semver-minor dependency-group: firecracker - dependency-name: aws-lc-fips-sys dependency-type: indirect update-type: version-update:semver-minor dependency-group: firecracker - dependency-name: aws-lc-sys dependency-type: indirect update-type: version-update:semver-minor dependency-group: firecracker - dependency-name: cc dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: env_filter dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker ... Signed-off-by: dependabot[bot] --- Cargo.lock | 65 ++++++++++++++-------------- src/acpi-tables/Cargo.toml | 2 +- src/clippy-tracing/Cargo.toml | 2 +- src/cpu-template-helper/Cargo.toml | 6 +-- src/firecracker/Cargo.toml | 10 ++--- src/jailer/Cargo.toml | 4 +- src/log-instrument-macros/Cargo.toml | 2 +- src/log-instrument/Cargo.toml | 2 +- src/rebase-snap/Cargo.toml | 4 +- src/seccompiler/Cargo.toml | 6 +-- src/snapshot-editor/Cargo.toml | 4 +- src/utils/Cargo.toml | 4 +- src/vmm/Cargo.toml | 8 ++-- 13 files changed, 60 insertions(+), 59 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d499d3f16f8..0d0b995381e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7,7 +7,7 @@ name = "acpi_tables" version = "0.1.0" dependencies = [ "displaydoc", - "thiserror 2.0.7", + "thiserror 2.0.9", "vm-memory", "zerocopy 0.8.13", ] @@ -125,9 +125,9 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "aws-lc-fips-sys" -version = "0.12.15" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1e8a8e212a7851ef3d4c28cdfc017072bc684f0e0f57c7943ab60f695c3bfb" +checksum = "59057b878509d88952425fe694a2806e468612bde2d71943f3cd8034935b5032" dependencies = [ "bindgen 0.69.5", "cc", @@ -136,13 +136,14 @@ dependencies = [ "fs_extra", "libc", "paste", + "regex", ] [[package]] name = "aws-lc-rs" -version = "1.11.1" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f47bb8cc16b669d267eeccf585aea077d0882f4777b1c1f740217885d6e6e5a3" +checksum = "f409eb70b561706bf8abba8ca9c112729c481595893fd06a2dd9af8ed8441148" dependencies = [ "aws-lc-fips-sys", "aws-lc-sys", @@ -153,9 +154,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.23.1" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2101df3813227bbaaaa0b04cd61c534c7954b22bd68d399b440be937dc63ff7" +checksum = "8478a5c29ead3f3be14aff8a202ad965cf7da6856860041bfca271becf8ba48b" dependencies = [ "bindgen 0.69.5", "cc", @@ -260,9 +261,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.4" +version = "1.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9157bbaa6b165880c27a4293a474c91cdcf265cc68cc829bf10be0964a391caf" +checksum = "c31a0499c1dc64f458ad13872de75c0eb7e3fdb0e67964610c914b034fc5956e" dependencies = [ "jobserver", "libc", @@ -419,7 +420,7 @@ dependencies = [ "log-instrument", "serde", "serde_json", - "thiserror 2.0.7", + "thiserror 2.0.9", "vmm", "vmm-sys-util", ] @@ -550,9 +551,9 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "env_filter" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab" +checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" dependencies = [ "log", "regex", @@ -560,9 +561,9 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13fa619b91fb2381732789fc5de83b45675e882f66623b7d8cb4f643017018d" +checksum = "dcaee3d8e3cfc3fd92428d477bc97fc29ec8716d180c0d74c643bb26166660e0" dependencies = [ "anstream", "anstyle", @@ -613,7 +614,7 @@ dependencies = [ "serde", "serde_derive", "serde_json", - "thiserror 2.0.7", + "thiserror 2.0.9", "timerfd", "userfaultfd", "utils", @@ -798,7 +799,7 @@ dependencies = [ "libc", "log-instrument", "regex", - "thiserror 2.0.7", + "thiserror 2.0.9", "utils", "vmm-sys-util", ] @@ -849,9 +850,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.168" +version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "libloading" @@ -1112,7 +1113,7 @@ dependencies = [ "displaydoc", "libc", "log-instrument", - "thiserror 2.0.7", + "thiserror 2.0.9", "utils", "vmm-sys-util", ] @@ -1190,7 +1191,7 @@ dependencies = [ "log-instrument", "serde", "serde_json", - "thiserror 2.0.7", + "thiserror 2.0.9", "utils", "vmm-sys-util", ] @@ -1226,9 +1227,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.133" +version = "1.0.134" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" +checksum = "d00f4175c42ee48b15416f6193a959ba3a0d67fc699a0db9ad12df9f83991c7d" dependencies = [ "itoa", "memchr", @@ -1270,7 +1271,7 @@ dependencies = [ "libc", "log-instrument", "semver", - "thiserror 2.0.7", + "thiserror 2.0.9", "utils", "vmm", "vmm-sys-util", @@ -1290,9 +1291,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.90" +version = "2.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" +checksum = "d53cbcb5a243bd33b7858b1d7f4aca2153490815872d86d955d6ea29f743c035" dependencies = [ "proc-macro2", "quote", @@ -1310,11 +1311,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.7" +version = "2.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93605438cbd668185516ab499d589afb7ee1859ea3d5fc8f6b0755e1c7443767" +checksum = "f072643fd0190df67a8bab670c20ef5d8737177d6ac6b2e9a236cb096206b2cc" dependencies = [ - "thiserror-impl 2.0.7", + "thiserror-impl 2.0.9", ] [[package]] @@ -1330,9 +1331,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "2.0.7" +version = "2.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d8749b4531af2117677a5fcd12b1348a3fe2b81e36e61ffeac5c4aa3273e36" +checksum = "7b50fa271071aae2e6ee85f842e2e28ba8cd2c5fb67f11fcb1fd70b276f9e7d4" dependencies = [ "proc-macro2", "quote", @@ -1470,7 +1471,7 @@ dependencies = [ "displaydoc", "libc", "log-instrument", - "thiserror 2.0.7", + "thiserror 2.0.9", ] [[package]] @@ -1581,7 +1582,7 @@ dependencies = [ "serde", "serde_json", "slab", - "thiserror 2.0.7", + "thiserror 2.0.9", "timerfd", "userfaultfd", "utils", diff --git a/src/acpi-tables/Cargo.toml b/src/acpi-tables/Cargo.toml index 0a7f92dee5f..7334406670a 100644 --- a/src/acpi-tables/Cargo.toml +++ b/src/acpi-tables/Cargo.toml @@ -8,7 +8,7 @@ license = "Apache-2.0" [dependencies] displaydoc = "0.2.5" -thiserror = "2.0.7" +thiserror = "2.0.9" vm-memory = { version = "0.16.1", features = ["backend-mmap", "backend-bitmap"] } zerocopy = { version = "0.8.13", features = ["derive"] } diff --git a/src/clippy-tracing/Cargo.toml b/src/clippy-tracing/Cargo.toml index e112095226a..92dbf249fc1 100644 --- a/src/clippy-tracing/Cargo.toml +++ b/src/clippy-tracing/Cargo.toml @@ -14,7 +14,7 @@ clap = { version = "4.5.23", features = ["derive"] } itertools = "0.13.0" proc-macro2 = { version = "1.0.92", features = ["span-locations"] } quote = "1.0.37" -syn = { version = "2.0.90", features = ["full", "extra-traits", "visit", "visit-mut", "printing"] } +syn = { version = "2.0.91", features = ["full", "extra-traits", "visit", "visit-mut", "printing"] } walkdir = "2.5.0" [dev-dependencies] diff --git a/src/cpu-template-helper/Cargo.toml b/src/cpu-template-helper/Cargo.toml index 5fd314bf48c..b4a8f8e3962 100644 --- a/src/cpu-template-helper/Cargo.toml +++ b/src/cpu-template-helper/Cargo.toml @@ -12,11 +12,11 @@ bench = false [dependencies] clap = { version = "4.5.23", features = ["derive", "string"] } displaydoc = "0.2.5" -libc = "0.2.168" +libc = "0.2.169" log-instrument = { path = "../log-instrument", optional = true } serde = { version = "1.0.216", features = ["derive"] } -serde_json = "1.0.133" -thiserror = "2.0.7" +serde_json = "1.0.134" +thiserror = "2.0.9" vmm = { path = "../vmm" } vmm-sys-util = "0.12.1" diff --git a/src/firecracker/Cargo.toml b/src/firecracker/Cargo.toml index cbd9ffe5e2d..1f5a18d9b2e 100644 --- a/src/firecracker/Cargo.toml +++ b/src/firecracker/Cargo.toml @@ -18,15 +18,15 @@ bench = false [dependencies] displaydoc = "0.2.5" event-manager = "0.4.0" -libc = "0.2.168" +libc = "0.2.169" log-instrument = { path = "../log-instrument", optional = true } micro_http = { git = "https://github.com/firecracker-microvm/micro-http" } seccompiler = { path = "../seccompiler" } serde = { version = "1.0.216", features = ["derive"] } serde_derive = "1.0.136" -serde_json = "1.0.133" -thiserror = "2.0.7" +serde_json = "1.0.134" +thiserror = "2.0.9" timerfd = "1.6.0" utils = { path = "../utils" } vmm = { path = "../vmm" } @@ -34,7 +34,7 @@ vmm-sys-util = { version = "0.12.1", features = ["with-serde"] } [dev-dependencies] cargo_toml = "0.21.0" -libc = "0.2.168" +libc = "0.2.169" regex = { version = "1.11.1", default-features = false, features = ["std", "unicode-perl"] } # Dev-Dependencies for uffd examples @@ -45,7 +45,7 @@ userfaultfd = "0.8.1" bincode = "1.2.1" seccompiler = { path = "../seccompiler" } serde = { version = "1.0.216" } -serde_json = "1.0.133" +serde_json = "1.0.134" [features] tracing = ["log-instrument", "seccompiler/tracing", "utils/tracing", "vmm/tracing"] diff --git a/src/jailer/Cargo.toml b/src/jailer/Cargo.toml index 955fe1af5b2..4c29f53a0e1 100644 --- a/src/jailer/Cargo.toml +++ b/src/jailer/Cargo.toml @@ -12,10 +12,10 @@ name = "jailer" bench = false [dependencies] -libc = "0.2.168" +libc = "0.2.169" log-instrument = { path = "../log-instrument", optional = true } regex = { version = "1.11.1", default-features = false, features = ["std"] } -thiserror = "2.0.7" +thiserror = "2.0.9" vmm-sys-util = "0.12.1" utils = { path = "../utils" } diff --git a/src/log-instrument-macros/Cargo.toml b/src/log-instrument-macros/Cargo.toml index 2129df061f2..d1eb9c0b0d2 100644 --- a/src/log-instrument-macros/Cargo.toml +++ b/src/log-instrument-macros/Cargo.toml @@ -13,7 +13,7 @@ bench = false [dependencies] proc-macro2 = "1.0.92" quote = "1.0.37" -syn = { version = "2.0.90", features = ["full", "extra-traits"] } +syn = { version = "2.0.91", features = ["full", "extra-traits"] } [lints] workspace = true diff --git a/src/log-instrument/Cargo.toml b/src/log-instrument/Cargo.toml index e418a7cf886..d9ea9d7dd63 100644 --- a/src/log-instrument/Cargo.toml +++ b/src/log-instrument/Cargo.toml @@ -32,7 +32,7 @@ log = "0.4.22" log-instrument-macros = { path = "../log-instrument-macros" } [dev-dependencies] -env_logger = "0.11.5" +env_logger = "0.11.6" [lints] workspace = true diff --git a/src/rebase-snap/Cargo.toml b/src/rebase-snap/Cargo.toml index c435d2ae249..10d93be2712 100644 --- a/src/rebase-snap/Cargo.toml +++ b/src/rebase-snap/Cargo.toml @@ -11,9 +11,9 @@ bench = false [dependencies] displaydoc = "0.2.5" -libc = "0.2.168" +libc = "0.2.169" log-instrument = { path = "../log-instrument", optional = true } -thiserror = "2.0.7" +thiserror = "2.0.9" vmm-sys-util = "0.12.1" utils = { path = "../utils" } diff --git a/src/seccompiler/Cargo.toml b/src/seccompiler/Cargo.toml index 4c23f0ae595..1e5cf55866a 100644 --- a/src/seccompiler/Cargo.toml +++ b/src/seccompiler/Cargo.toml @@ -18,11 +18,11 @@ bench = false [dependencies] bincode = "1.2.1" displaydoc = "0.2.5" -libc = "0.2.168" +libc = "0.2.169" log-instrument = { path = "../log-instrument", optional = true } serde = { version = "1.0.216", features = ["derive"] } -serde_json = "1.0.133" -thiserror = "2.0.7" +serde_json = "1.0.134" +thiserror = "2.0.9" utils = { path = "../utils" } diff --git a/src/snapshot-editor/Cargo.toml b/src/snapshot-editor/Cargo.toml index a199a8ab99f..f57268656c4 100644 --- a/src/snapshot-editor/Cargo.toml +++ b/src/snapshot-editor/Cargo.toml @@ -14,10 +14,10 @@ clap = { version = "4.5.23", features = ["derive", "string"] } displaydoc = "0.2.5" fc_utils = { package = "utils", path = "../utils" } -libc = "0.2.168" +libc = "0.2.169" log-instrument = { path = "../log-instrument", optional = true } semver = "1.0.24" -thiserror = "2.0.7" +thiserror = "2.0.9" vmm = { path = "../vmm" } vmm-sys-util = "0.12.1" diff --git a/src/utils/Cargo.toml b/src/utils/Cargo.toml index c4509ec1e57..bf8a5092968 100644 --- a/src/utils/Cargo.toml +++ b/src/utils/Cargo.toml @@ -10,9 +10,9 @@ bench = false [dependencies] displaydoc = "0.2.5" -libc = "0.2.168" +libc = "0.2.169" log-instrument = { path = "../log-instrument", optional = true } -thiserror = "2.0.7" +thiserror = "2.0.9" [features] tracing = ["log-instrument"] diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 043fc11528c..64e76bbc15d 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -12,7 +12,7 @@ bench = false acpi_tables = { path = "../acpi-tables" } aes-gcm = { version = "0.10.1", default-features = false, features = ["aes"] } arrayvec = { version = "0.7.6", optional = true } -aws-lc-rs = { version = "1.11.1", features = ["bindgen"] } +aws-lc-rs = { version = "1.12.0", features = ["bindgen"] } base64 = "0.22.1" bincode = "1.2.1" bitflags = "2.6.0" @@ -24,7 +24,7 @@ gdbstub = { version = "0.7.3", optional = true } gdbstub_arch = { version = "0.3.1", optional = true } kvm-bindings = { version = "0.10.0", features = ["fam-wrappers", "serde"] } kvm-ioctls = "0.19.1" -libc = "0.2.168" +libc = "0.2.169" linux-loader = "0.13.0" log = { version = "0.4.22", features = ["std", "serde"] } log-instrument = { path = "../log-instrument", optional = true } @@ -34,9 +34,9 @@ micro_http = { git = "https://github.com/firecracker-microvm/micro-http" } seccompiler = { path = "../seccompiler" } semver = { version = "1.0.24", features = ["serde"] } serde = { version = "1.0.216", features = ["derive", "rc"] } -serde_json = "1.0.133" +serde_json = "1.0.134" slab = "0.4.7" -thiserror = "2.0.7" +thiserror = "2.0.9" timerfd = "1.5.0" userfaultfd = "0.8.1" utils = { path = "../utils" } From 5e86a2c067d51775ad1a604b7220e59569772025 Mon Sep 17 00:00:00 2001 From: Nikita Kalyazin Date: Fri, 27 Dec 2024 16:31:22 +0000 Subject: [PATCH 03/78] chore: unpin m6i/6.1 ami The volatility is not visible on the latest AMI. Signed-off-by: Nikita Kalyazin --- .buildkite/pipeline_perf.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/.buildkite/pipeline_perf.py b/.buildkite/pipeline_perf.py index 2a26f4277cc..e8169bfb2cd 100755 --- a/.buildkite/pipeline_perf.py +++ b/.buildkite/pipeline_perf.py @@ -124,8 +124,6 @@ pins = { # TODO: Unpin when performance instability on m6i/5.10 has gone. "linux_5.10-pinned": {"instance": "m6i.metal", "kv": "linux_5.10"}, - # TODO: Unpin when performance instability on m6i/6.1 has gone. - "linux_6.1-pinned": {"instance": "m6i.metal", "kv": "linux_6.1"}, } From 80b4cb4491273c76ec7c93746a36da9c7edd2388 Mon Sep 17 00:00:00 2001 From: Nikita Kalyazin Date: Tue, 24 Dec 2024 11:52:25 +0000 Subject: [PATCH 04/78] test(wrmsr): exclude MSR_IA32_SPEC_CTRL from check MSR_IA32_SPEC_CTRL is a dynamic MSR, whose value is constantly updated by the guest OS. It is not a good fit for the wrmsr test that targets static MSRs that retain their value for a long time. Signed-off-by: Nikita Kalyazin --- tests/data/msr/wrmsr_list.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/data/msr/wrmsr_list.txt b/tests/data/msr/wrmsr_list.txt index 87cc6ab1b66..a6a9e387aa4 100644 --- a/tests/data/msr/wrmsr_list.txt +++ b/tests/data/msr/wrmsr_list.txt @@ -1,5 +1,4 @@ 0x1b 0xfee00c00 -0x48 0x1 0x174 0x11 0x17a 0x1 0x1a0 0x0 From 5b706ed0450d5f52b231f1a608ddd258c7221d79 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Mon, 30 Dec 2024 14:17:34 +0000 Subject: [PATCH 05/78] test: Add pytest option to apply custom CPU template to any microVM Sometimes we would like to test a custom CPU template for debugging / testing purposes. The new pytest option `--custom-cpu-template` allows us to apply the given CPU template to any microVM in any test unless it is overwritten by a test. Since `cpu_template_any` yields not only CPU templates but also None, the given CPU template will be used in the None case. Signed-off-by: Takahiro Itazuri --- tests/conftest.py | 24 +++++++++++++++++++++++- tests/framework/microvm.py | 7 +++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 865fa0b44b2..fad3d072fe1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,6 +23,7 @@ """ import inspect +import json import os import re import shutil @@ -66,6 +67,14 @@ def pytest_addoption(parser): help="use firecracker/jailer binaries from this directory instead of compiling from source", ) + parser.addoption( + "--custom-cpu-template", + action="store", + help="Path to custom CPU template to be applied unless overwritten by a test", + default=None, + type=Path, + ) + @pytest.hookimpl(wrapper=True, tryfirst=True) def pytest_runtest_makereport(item, call): # pylint:disable=unused-argument @@ -270,9 +279,22 @@ def microvm_factory(request, record_property, results_dir): fc_binary_path, jailer_binary_path = build_tools.get_firecracker_binaries() record_property("firecracker_bin", str(fc_binary_path)) + # If `--custom-cpu-template` option is provided, the given CPU template will + # be applied afterwards unless overwritten. + custom_cpu_template_path = request.config.getoption("--custom-cpu-template") + custom_cpu_template = ( + { + "name": custom_cpu_template_path.stem, + "template": json.loads(custom_cpu_template_path.read_text("utf-8")), + } + if custom_cpu_template_path + else None + ) # We could override the chroot base like so # jailer_kwargs={"chroot_base": "/srv/jailo"} - uvm_factory = MicroVMFactory(fc_binary_path, jailer_binary_path) + uvm_factory = MicroVMFactory( + fc_binary_path, jailer_binary_path, custom_cpu_template=custom_cpu_template + ) yield uvm_factory # if the test failed, save important files from the root of the uVM into `test_results` for troubleshooting diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index 11fecf720a7..0903b689dfa 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -188,6 +188,7 @@ def __init__( monitor_memory: bool = True, jailer_kwargs: Optional[dict] = None, numa_node=None, + custom_cpu_template: Path = None, ): """Set up microVM attributes, paths, and data structures.""" # pylint: disable=too-many-statements @@ -246,6 +247,9 @@ def __init__( self.vcpus_count = None self.mem_size_bytes = None self.cpu_template_name = None + # The given custom CPU template will be set in basic_config() but could + # be overwritten via set_cpu_template(). + self.custom_cpu_template = custom_cpu_template self._connections = [] @@ -748,6 +752,9 @@ def basic_config( self.vcpus_count = vcpu_count self.mem_size_bytes = mem_size_mib * 2**20 + if self.custom_cpu_template is not None: + self.set_cpu_template(self.custom_cpu_template) + if cpu_template is not None: self.set_cpu_template(cpu_template) From e70bfabe2c547cf34f940af24e99c3b48ab5205d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Barb=C3=A1chano?= Date: Mon, 30 Dec 2024 16:25:32 +0100 Subject: [PATCH 06/78] ci: print EC2 AMI in the pytest report header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is useful for validation tasks and when troubleshooting. Signed-off-by: Pablo Barbáchano --- tests/conftest.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index fad3d072fe1..fb0fe4d5752 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -76,6 +76,11 @@ def pytest_addoption(parser): ) +def pytest_report_header(): + """Pytest hook to print relevant metadata in the logs""" + return f"EC2 AMI: {global_props.ami}" + + @pytest.hookimpl(wrapper=True, tryfirst=True) def pytest_runtest_makereport(item, call): # pylint:disable=unused-argument """Plugin to get test results in fixtures From 846cdefcf57b47b5c35333e0d227fba0bc8ae46b Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Fri, 15 Nov 2024 10:47:56 +0000 Subject: [PATCH 07/78] refactor: replace PAGE_SIZE with GUEST/HOST_PAGE_SIZE Firecracker was assuming page sizes for both host and guest are 4K. But they can differ, so split into 2 values. Signed-off-by: Egor Lazarchuk --- src/vmm/src/arch/aarch64/mod.rs | 3 ++- src/vmm/src/arch/mod.rs | 7 +++++-- src/vmm/src/arch/x86_64/mod.rs | 2 +- src/vmm/src/builder.rs | 4 ++-- src/vmm/src/devices/virtio/iov_deque.rs | 8 ++++---- src/vmm/src/devices/virtio/iovec.rs | 6 +++--- src/vmm/src/gdb/target.rs | 6 +++--- src/vmm/src/vstate/vm.rs | 2 +- 8 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/vmm/src/arch/aarch64/mod.rs b/src/vmm/src/arch/aarch64/mod.rs index 08ca1b65edb..a409f330c56 100644 --- a/src/vmm/src/arch/aarch64/mod.rs +++ b/src/vmm/src/arch/aarch64/mod.rs @@ -94,7 +94,8 @@ pub fn initrd_load_addr( guest_mem: &GuestMemoryMmap, initrd_size: usize, ) -> Result { - let round_to_pagesize = |size| (size + (super::PAGE_SIZE - 1)) & !(super::PAGE_SIZE - 1); + let round_to_pagesize = + |size| (size + (super::GUEST_PAGE_SIZE - 1)) & !(super::GUEST_PAGE_SIZE - 1); match GuestAddress(get_fdt_addr(guest_mem)).checked_sub(round_to_pagesize(initrd_size) as u64) { Some(offset) => { if guest_mem.address_in_range(offset) { diff --git a/src/vmm/src/arch/mod.rs b/src/vmm/src/arch/mod.rs index f5a2f98cb7c..0f3bd6e46ef 100644 --- a/src/vmm/src/arch/mod.rs +++ b/src/vmm/src/arch/mod.rs @@ -52,8 +52,11 @@ pub struct InitrdConfig { pub size: usize, } -/// Default (smallest) memory page size for the supported architectures. -pub const PAGE_SIZE: usize = 4096; +/// Default page size for the guest OS. +pub const GUEST_PAGE_SIZE: usize = 4096; + +/// Default page size for the host OS. +pub const HOST_PAGE_SIZE: usize = 4096; impl fmt::Display for DeviceType { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { diff --git a/src/vmm/src/arch/x86_64/mod.rs b/src/vmm/src/arch/x86_64/mod.rs index 1066d9734c3..40f4f15607a 100644 --- a/src/vmm/src/arch/x86_64/mod.rs +++ b/src/vmm/src/arch/x86_64/mod.rs @@ -97,7 +97,7 @@ pub fn initrd_load_addr( return Err(ConfigurationError::InitrdAddress); } - let align_to_pagesize = |address| address & !(super::PAGE_SIZE - 1); + let align_to_pagesize = |address| address & !(super::GUEST_PAGE_SIZE - 1); Ok(align_to_pagesize(lowmem_size - initrd_size) as u64) } diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 8594da9f077..6131e248d91 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -1305,7 +1305,7 @@ pub(crate) mod tests { use crate::vstate::memory::GuestMemory; let image = make_test_bin(); - let mem_size: usize = image.len() * 2 + crate::arch::PAGE_SIZE; + let mem_size: usize = image.len() * 2 + crate::arch::GUEST_PAGE_SIZE; let tempfile = TempFile::new().unwrap(); let mut tempfile = tempfile.into_file(); @@ -1344,7 +1344,7 @@ pub(crate) mod tests { let tempfile = TempFile::new().unwrap(); let mut tempfile = tempfile.into_file(); tempfile.write_all(&image).unwrap(); - let gm = single_region_mem_at(crate::arch::PAGE_SIZE as u64 + 1, image.len() * 2); + let gm = single_region_mem_at(crate::arch::GUEST_PAGE_SIZE as u64 + 1, image.len() * 2); let res = load_initrd(&gm, &mut tempfile); assert!( diff --git a/src/vmm/src/devices/virtio/iov_deque.rs b/src/vmm/src/devices/virtio/iov_deque.rs index 51bf28a49eb..994ba5cd320 100644 --- a/src/vmm/src/devices/virtio/iov_deque.rs +++ b/src/vmm/src/devices/virtio/iov_deque.rs @@ -6,7 +6,7 @@ use std::os::fd::AsRawFd; use libc::{c_int, c_void, iovec, off_t, size_t}; use memfd; -use crate::arch::PAGE_SIZE; +use crate::arch::HOST_PAGE_SIZE; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum IovDequeError { @@ -79,8 +79,8 @@ pub enum IovDequeError { // ``` // // This value must be a multiple of 256 because this is the maximum number of `iovec` can fit into -// 1 memory page: 256 * sizeof(iovec) == 4096 == PAGE_SIZE. IovDeque only operates with `PAGE_SIZE` -// granularity. +// 1 memory page: 256 * sizeof(iovec) == 4096 == HOST_PAGE_SIZE. IovDeque only operates with +// `HOST_PAGE_SIZE` granularity. #[derive(Debug)] pub struct IovDeque { pub iov: *mut libc::iovec, @@ -93,7 +93,7 @@ unsafe impl Send for IovDeque {} impl IovDeque { const BYTES: usize = L as usize * std::mem::size_of::(); - const _ASSERT: () = assert!(Self::BYTES % PAGE_SIZE == 0); + const _ASSERT: () = assert!(Self::BYTES % HOST_PAGE_SIZE == 0); /// Create a [`memfd`] object that represents a single physical page fn create_memfd() -> Result { diff --git a/src/vmm/src/devices/virtio/iovec.rs b/src/vmm/src/devices/virtio/iovec.rs index 9262dff661b..c9893260d9e 100644 --- a/src/vmm/src/devices/virtio/iovec.rs +++ b/src/vmm/src/devices/virtio/iovec.rs @@ -815,13 +815,13 @@ mod verification { use vm_memory::VolatileSlice; use super::IoVecBuffer; + use crate::arch::GUEST_PAGE_SIZE; use crate::devices::virtio::iov_deque::IovDeque; // Redefine `IoVecBufferMut` and `IovDeque` with specific length. Otherwise // Rust will not know what to do. type IoVecBufferMutDefault = super::IoVecBufferMut; type IovDequeDefault = IovDeque; - use crate::arch::PAGE_SIZE; use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE; // Maximum memory size to use for our buffers. For the time being 1KB. @@ -912,8 +912,8 @@ mod verification { // SAFETY: safe because the layout has non-zero size let mem = unsafe { std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked( - 2 * PAGE_SIZE, - PAGE_SIZE, + 2 * GUEST_PAGE_SIZE, + GUEST_PAGE_SIZE, )) }; IovDequeDefault { diff --git a/src/vmm/src/gdb/target.rs b/src/vmm/src/gdb/target.rs index b8230342b27..6f3f2593c15 100644 --- a/src/vmm/src/gdb/target.rs +++ b/src/vmm/src/gdb/target.rs @@ -33,7 +33,7 @@ use vm_memory::{Bytes, GuestAddress, GuestMemoryError}; use super::arch; #[cfg(target_arch = "aarch64")] use crate::arch::aarch64::vcpu::VcpuError as AarchVcpuError; -use crate::arch::PAGE_SIZE; +use crate::arch::GUEST_PAGE_SIZE; use crate::logger::{error, info}; use crate::utils::u64_to_usize; use crate::vstate::vcpu::VcpuSendEventError; @@ -396,7 +396,7 @@ impl MultiThreadBase for FirecrackerTarget { // Compute the amount space left in the page after the gpa let read_len = std::cmp::min( data.len(), - PAGE_SIZE - (u64_to_usize(gpa) & (PAGE_SIZE - 1)), + GUEST_PAGE_SIZE - (u64_to_usize(gpa) & (GUEST_PAGE_SIZE - 1)), ); vmm.guest_memory() @@ -430,7 +430,7 @@ impl MultiThreadBase for FirecrackerTarget { // Compute the amount space left in the page after the gpa let write_len = std::cmp::min( data.len(), - PAGE_SIZE - (u64_to_usize(gpa) & (PAGE_SIZE - 1)), + GUEST_PAGE_SIZE - (u64_to_usize(gpa) & (GUEST_PAGE_SIZE - 1)), ); vmm.guest_memory() diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index 3d24dc6f9ac..d213d4d7bb6 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -593,7 +593,7 @@ pub(crate) mod tests { let res = vm.set_kvm_memory_regions(&gm, false); res.unwrap(); - // Trying to set a memory region with a size that is not a multiple of PAGE_SIZE + // Trying to set a memory region with a size that is not a multiple of GUEST_PAGE_SIZE // will result in error. let gm = single_region_mem(0x10); let res = vm.set_kvm_memory_regions(&gm, false); From 7f614b188eb96e2876fdf4bfd56956b0dbb281c4 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Fri, 15 Nov 2024 11:59:35 +0000 Subject: [PATCH 08/78] feat: query host page size Define global variable with host page size and update it at the very beginning of the main function in Firecracker. This way data types which rely on specific host page size can adapt to it. Signed-off-by: Egor Lazarchuk --- src/firecracker/src/main.rs | 5 +++++ src/vmm/src/arch/mod.rs | 18 ++++++++++++++++-- src/vmm/src/devices/virtio/iov_deque.rs | 5 +++-- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/firecracker/src/main.rs b/src/firecracker/src/main.rs index 8fb5392afcf..300afd0ad66 100644 --- a/src/firecracker/src/main.rs +++ b/src/firecracker/src/main.rs @@ -20,6 +20,7 @@ use seccomp::FilterError; use seccompiler::BpfThreadMap; use utils::arg_parser::{ArgParser, Argument}; use utils::validators::validate_instance_id; +use vmm::arch::host_page_size; use vmm::builder::StartMicrovmError; use vmm::logger::{ debug, error, info, LoggerConfig, ProcessTimeReporter, StoreMetric, LOGGER, METRICS, @@ -108,6 +109,10 @@ fn main_exec() -> Result<(), MainError> { // Initialize the logger. LOGGER.init().map_err(MainError::SetLogger)?; + // First call to this function updates the value to current + // host page size. + _ = host_page_size(); + // We need this so that we can reset terminal to canonical mode if panic occurs. let stdin = io::stdin(); diff --git a/src/vmm/src/arch/mod.rs b/src/vmm/src/arch/mod.rs index 0f3bd6e46ef..a51055622e4 100644 --- a/src/vmm/src/arch/mod.rs +++ b/src/vmm/src/arch/mod.rs @@ -2,7 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 use std::fmt; +use std::sync::LazyLock; +use log::warn; use serde::{Deserialize, Serialize}; /// Module for aarch64 related functionality. @@ -55,8 +57,20 @@ pub struct InitrdConfig { /// Default page size for the guest OS. pub const GUEST_PAGE_SIZE: usize = 4096; -/// Default page size for the host OS. -pub const HOST_PAGE_SIZE: usize = 4096; +/// Get the size of the host page size. +pub fn host_page_size() -> usize { + /// Default page size for the host OS. + static PAGE_SIZE: LazyLock = LazyLock::new(|| { + // # Safety: Value always valid + let r = unsafe { libc::sysconf(libc::_SC_PAGESIZE) }; + usize::try_from(r).unwrap_or_else(|_| { + warn!("Could not get host page size with sysconf, assuming default 4K host pages"); + 4096 + }) + }); + + *PAGE_SIZE +} impl fmt::Display for DeviceType { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { diff --git a/src/vmm/src/devices/virtio/iov_deque.rs b/src/vmm/src/devices/virtio/iov_deque.rs index 994ba5cd320..e3a2534994a 100644 --- a/src/vmm/src/devices/virtio/iov_deque.rs +++ b/src/vmm/src/devices/virtio/iov_deque.rs @@ -6,7 +6,7 @@ use std::os::fd::AsRawFd; use libc::{c_int, c_void, iovec, off_t, size_t}; use memfd; -use crate::arch::HOST_PAGE_SIZE; +use crate::arch::host_page_size; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum IovDequeError { @@ -93,7 +93,6 @@ unsafe impl Send for IovDeque {} impl IovDeque { const BYTES: usize = L as usize * std::mem::size_of::(); - const _ASSERT: () = assert!(Self::BYTES % HOST_PAGE_SIZE == 0); /// Create a [`memfd`] object that represents a single physical page fn create_memfd() -> Result { @@ -153,6 +152,8 @@ impl IovDeque { /// Create a new [`IovDeque`] that can hold memory described by a single VirtIO queue. pub fn new() -> Result { + assert!(Self::BYTES % host_page_size() == 0); + let memfd = Self::create_memfd()?; let raw_memfd = memfd.as_file().as_raw_fd(); let buffer = Self::allocate_ring_buffer_memory()?; From 0123d4c0fcb5037bfda9335590e2f916ddec4f7a Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Fri, 15 Nov 2024 12:07:06 +0000 Subject: [PATCH 09/78] feat: update IovDeque to support arbitrary size and host page size Remove restriction on size and host page size. Signed-off-by: Egor Lazarchuk --- src/vmm/src/devices/virtio/iov_deque.rs | 57 ++++++++++++++++--------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/src/vmm/src/devices/virtio/iov_deque.rs b/src/vmm/src/devices/virtio/iov_deque.rs index e3a2534994a..b28d7076f43 100644 --- a/src/vmm/src/devices/virtio/iov_deque.rs +++ b/src/vmm/src/devices/virtio/iov_deque.rs @@ -77,10 +77,7 @@ pub enum IovDequeError { // pub iov_len: ::size_t, // } // ``` -// -// This value must be a multiple of 256 because this is the maximum number of `iovec` can fit into -// 1 memory page: 256 * sizeof(iovec) == 4096 == HOST_PAGE_SIZE. IovDeque only operates with -// `HOST_PAGE_SIZE` granularity. + #[derive(Debug)] pub struct IovDeque { pub iov: *mut libc::iovec, @@ -92,17 +89,15 @@ pub struct IovDeque { unsafe impl Send for IovDeque {} impl IovDeque { - const BYTES: usize = L as usize * std::mem::size_of::(); - /// Create a [`memfd`] object that represents a single physical page - fn create_memfd() -> Result { + fn create_memfd(pages_bytes: usize) -> Result { // Create a sealable memfd. let opts = memfd::MemfdOptions::default().allow_sealing(true); let mfd = opts.create("iov_deque")?; // Resize to system page size. mfd.as_file() - .set_len(Self::BYTES.try_into().unwrap()) + .set_len(pages_bytes.try_into().unwrap()) .map_err(IovDequeError::MemfdResize)?; // Add seals to prevent further resizing. @@ -135,13 +130,13 @@ impl IovDeque { /// Allocate memory for our ring buffer /// - /// This will allocate 2 * `Self::BYTES` bytes of virtual memory. - fn allocate_ring_buffer_memory() -> Result<*mut c_void, IovDequeError> { + /// This will allocate 2 * `pages_bytes` bytes of virtual memory. + fn allocate_ring_buffer_memory(pages_bytes: usize) -> Result<*mut c_void, IovDequeError> { // SAFETY: We are calling the system call with valid arguments unsafe { Self::mmap( std::ptr::null_mut(), - Self::BYTES * 2, + pages_bytes * 2, libc::PROT_NONE, libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, -1, @@ -150,20 +145,29 @@ impl IovDeque { } } + /// Calculate a number of bytes in full pages required for + /// the type to operate. + fn pages_bytes() -> usize { + let host_page_size = host_page_size(); + let bytes = L as usize * std::mem::size_of::(); + let num_host_pages = bytes.div_ceil(host_page_size); + num_host_pages * host_page_size + } + /// Create a new [`IovDeque`] that can hold memory described by a single VirtIO queue. pub fn new() -> Result { - assert!(Self::BYTES % host_page_size() == 0); + let pages_bytes = Self::pages_bytes(); - let memfd = Self::create_memfd()?; + let memfd = Self::create_memfd(pages_bytes)?; let raw_memfd = memfd.as_file().as_raw_fd(); - let buffer = Self::allocate_ring_buffer_memory()?; + let buffer = Self::allocate_ring_buffer_memory(pages_bytes)?; // Map the first page of virtual memory to the physical page described by the memfd object // SAFETY: We are calling the system call with valid arguments let _ = unsafe { Self::mmap( buffer, - Self::BYTES, + pages_bytes, libc::PROT_READ | libc::PROT_WRITE, libc::MAP_SHARED | libc::MAP_FIXED, raw_memfd, @@ -174,17 +178,17 @@ impl IovDeque { // Map the second page of virtual memory to the physical page described by the memfd object // // SAFETY: This is safe because: - // * Both `buffer` and the result of `buffer.add(Self::BYTES)` are within bounds of the + // * Both `buffer` and the result of `buffer.add(pages_bytes)` are within bounds of the // allocation we got from `Self::allocate_ring_buffer_memory`. // * The resulting pointer is the beginning of the second page of our allocation, so it // doesn't wrap around the address space. - let next_page = unsafe { buffer.add(Self::BYTES) }; + let next_page = unsafe { buffer.add(pages_bytes) }; // SAFETY: We are calling the system call with valid arguments let _ = unsafe { Self::mmap( next_page, - Self::BYTES, + pages_bytes, libc::PROT_READ | libc::PROT_WRITE, libc::MAP_SHARED | libc::MAP_FIXED, raw_memfd, @@ -312,9 +316,10 @@ impl IovDeque { impl Drop for IovDeque { fn drop(&mut self) { + let pages_bytes = Self::pages_bytes(); // SAFETY: We are passing an address that we got from a previous allocation of `2 * - // Self::BYTES` bytes by calling mmap - let _ = unsafe { libc::munmap(self.iov.cast(), Self::BYTES * 2) }; + // pages_bytes` by calling mmap + let _ = unsafe { libc::munmap(self.iov.cast(), 2 * pages_bytes) }; } } @@ -332,6 +337,18 @@ mod tests { assert_eq!(deque.len(), 0); } + #[test] + fn test_new_less_than_page() { + let deque = super::IovDeque::<128>::new().unwrap(); + assert_eq!(deque.len(), 0); + } + + #[test] + fn test_new_more_than_page() { + let deque = super::IovDeque::<512>::new().unwrap(); + assert_eq!(deque.len(), 0); + } + fn make_iovec(id: u16, len: u16) -> iovec { iovec { iov_base: id as *mut libc::c_void, From 11d8a01365bfa7d472e75e1dde0ac05e00fe19eb Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Thu, 21 Nov 2024 17:13:31 +0000 Subject: [PATCH 10/78] chore: add `IovDeque` fix to the CHANGELOG Add note about making `IovDeque` to work with any host page size. Signed-off-by: Egor Lazarchuk --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65ef73a1e1f..e402b85ad43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,9 @@ and this project adheres to - [#4921](https://github.com/firecracker-microvm/firecracker/pull/4921): Fixed swagger `CpuConfig` definition to include missing aarch64-specific fields. +- [#4916](https://github.com/firecracker-microvm/firecracker/pull/4916): Fixed + `IovDeque` implementation to work with any host page size. This fixes + virtio-net device on non 4K host kernels. ## [1.10.1] From cea24c52494b95f30e270b4cfb3a20872b02e515 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Barb=C3=A1chano?= Date: Thu, 2 Jan 2025 16:51:48 +0100 Subject: [PATCH 11/78] ci: fix passing simple values to step parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In c6add4fccf27cd0522a7e49a2c5dc67ab7067980 while adding support for more complex parameters we broke passing simple strings. While it can be worked around by quoting the string with '', that makes the simple case more complex than it was before, and breaks previous pipeline definitions. Fix it so if the first character does not look like a python expression, take the value verbatim. Fixes: c6add4fccf27cd0522a7e49a2c5dc67ab7067980 Signed-off-by: Pablo Barbáchano --- .buildkite/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.buildkite/common.py b/.buildkite/common.py index 342bd671ddc..bda1ec9eccf 100644 --- a/.buildkite/common.py +++ b/.buildkite/common.py @@ -139,7 +139,8 @@ def __call__(self, parser, namespace, value, option_string=None): res = getattr(namespace, self.dest, {}) key_str, val = value.split("=", maxsplit=1) keys = key_str.split("/") - update = {keys[-1]: ast.literal_eval(val)} + # Interpret it as a literal iff it starts like one + update = {keys[-1]: ast.literal_eval(val) if val[0] in "[{'" else val} for key in list(reversed(keys))[1:]: update = {key: update} res = overlay_dict(res, update) From 7b01bdc1e44d5eb4f62aa89c2a79a05331a7ee1e Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Thu, 2 Jan 2025 11:45:37 +0000 Subject: [PATCH 12/78] chore(vmm): Add pointer alignment check for Queue Raw pointers of `struct Queue` are assumed to be aligned properly; otherwise some methods (e.g. `read_volatile()`) will panic. Such an alignment is possible when restored from a broken/fuzzed snapshot. Add pointer alignment check and exit with an error early instead of panic. Signed-off-by: Takahiro Itazuri --- src/vmm/src/devices/virtio/queue.rs | 95 +++++++++++++++++++++++++++-- 1 file changed, 89 insertions(+), 6 deletions(-) diff --git a/src/vmm/src/devices/virtio/queue.rs b/src/vmm/src/devices/virtio/queue.rs index 7c2c78d2407..7f595d7d9f7 100644 --- a/src/vmm/src/devices/virtio/queue.rs +++ b/src/vmm/src/devices/virtio/queue.rs @@ -34,6 +34,8 @@ pub enum QueueError { DescIndexOutOfBounds(u16), /// Failed to write value into the virtio queue used ring: {0} MemoryError(#[from] vm_memory::GuestMemoryError), + /// Pointer is not aligned properly: {0:#x} not {1}-byte aligned. + PointerNotAligned(usize, u8), } /// A virtio descriptor constraints with C representative. @@ -323,11 +325,36 @@ impl Queue { .get_slice_ptr(mem, self.used_ring_address, self.used_ring_size())? .cast(); - // Disable it for kani tests, otherwise it will hit this assertion - // and fail. - #[cfg(not(kani))] - if self.actual_size() < self.len() { - return Err(QueueError::InvalidQueueSize(self.len(), self.actual_size())); + // All the above pointers are expected to be aligned properly; otherwise some methods (e.g. + // `read_volatile()`) will panic. Such an unalignment is possible when restored from a + // broken/fuzzed snapshot. + // + // Specification of those pointers' alignments + // https://docs.oasis-open.org/virtio/virtio/v1.2/csd01/virtio-v1.2-csd01.html#x1-350007 + // > ================ ========== + // > Virtqueue Part Alignment + // > ================ ========== + // > Descriptor Table 16 + // > Available Ring 2 + // > Used Ring 4 + // > ================ ========== + if !self.desc_table_ptr.cast::().is_aligned() { + return Err(QueueError::PointerNotAligned( + self.desc_table_ptr as usize, + 16, + )); + } + if !self.avail_ring_ptr.is_aligned() { + return Err(QueueError::PointerNotAligned( + self.avail_ring_ptr as usize, + 2, + )); + } + if !self.used_ring_ptr.cast::().is_aligned() { + return Err(QueueError::PointerNotAligned( + self.used_ring_ptr as usize, + 4, + )); } Ok(()) @@ -1234,7 +1261,6 @@ mod verification { #[cfg(test)] mod tests { - use vm_memory::Bytes; pub use super::*; @@ -1644,6 +1670,63 @@ mod tests { assert_eq!(q.used_ring_avail_event_get(), 1); } + #[test] + fn test_initialize_with_aligned_pointer() { + let mut q = Queue::new(0); + + let random_addr = 0x321; + // Descriptor table must be 16-byte aligned. + q.desc_table_address = GuestAddress(random_addr / 16 * 16); + // Available ring must be 2-byte aligned. + q.avail_ring_address = GuestAddress(random_addr / 2 * 2); + // Used ring must be 4-byte aligned. + q.avail_ring_address = GuestAddress(random_addr / 4 * 4); + + let mem = single_region_mem(0x1000); + q.initialize(&mem).unwrap(); + } + + #[test] + fn test_initialize_with_misaligned_pointer() { + let mut q = Queue::new(0); + let mem = single_region_mem(0x1000); + + // Descriptor table must be 16-byte aligned. + q.desc_table_address = GuestAddress(0xb); + match q.initialize(&mem) { + Ok(_) => panic!("Unexpected success"), + Err(QueueError::PointerNotAligned(addr, alignment)) => { + assert_eq!(addr % 16, 0xb); + assert_eq!(alignment, 16); + } + Err(e) => panic!("Unexpected error {e:#?}"), + } + q.desc_table_address = GuestAddress(0x0); + + // Available ring must be 2-byte aligned. + q.avail_ring_address = GuestAddress(0x1); + match q.initialize(&mem) { + Ok(_) => panic!("Unexpected success"), + Err(QueueError::PointerNotAligned(addr, alignment)) => { + assert_eq!(addr % 2, 0x1); + assert_eq!(alignment, 2); + } + Err(e) => panic!("Unexpected error {e:#?}"), + } + q.avail_ring_address = GuestAddress(0x0); + + // Used ring must be 4-byte aligned. + q.used_ring_address = GuestAddress(0x3); + match q.initialize(&mem) { + Ok(_) => panic!("unexpected success"), + Err(QueueError::PointerNotAligned(addr, alignment)) => { + assert_eq!(addr % 4, 0x3); + assert_eq!(alignment, 4); + } + Err(e) => panic!("Unexpected error {e:#?}"), + } + } + #[test] fn test_queue_error_display() { let err = QueueError::MemoryError(vm_memory::GuestMemoryError::InvalidGuestAddress( From 02ee97e2d458322c9f4b68e13426e51c916b11fd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 30 Dec 2024 16:27:15 +0000 Subject: [PATCH 13/78] build(deps): Bump the firecracker group with 7 updates Bumps the firecracker group with 7 updates: | Package | From | To | | --- | --- | --- | | [zerocopy](https://github.com/google/zerocopy) | `0.8.13` | `0.8.14` | | [quote](https://github.com/dtolnay/quote) | `1.0.37` | `1.0.38` | | [syn](https://github.com/dtolnay/syn) | `2.0.91` | `2.0.93` | | [serde](https://github.com/serde-rs/serde) | `1.0.216` | `1.0.217` | | [serde_derive](https://github.com/serde-rs/serde) | `1.0.216` | `1.0.217` | | [cc](https://github.com/rust-lang/cc-rs) | `1.2.5` | `1.2.6` | | [glob](https://github.com/rust-lang/glob) | `0.3.1` | `0.3.2` | Updates `zerocopy` from 0.8.13 to 0.8.14 - [Release notes](https://github.com/google/zerocopy/releases) - [Changelog](https://github.com/google/zerocopy/blob/main/CHANGELOG.md) - [Commits](https://github.com/google/zerocopy/compare/v0.8.13...v0.8.14) Updates `quote` from 1.0.37 to 1.0.38 - [Release notes](https://github.com/dtolnay/quote/releases) - [Commits](https://github.com/dtolnay/quote/compare/1.0.37...1.0.38) Updates `syn` from 2.0.91 to 2.0.93 - [Release notes](https://github.com/dtolnay/syn/releases) - [Commits](https://github.com/dtolnay/syn/compare/2.0.91...2.0.93) Updates `serde` from 1.0.216 to 1.0.217 - [Release notes](https://github.com/serde-rs/serde/releases) - [Commits](https://github.com/serde-rs/serde/compare/v1.0.216...v1.0.217) Updates `serde_derive` from 1.0.216 to 1.0.217 - [Release notes](https://github.com/serde-rs/serde/releases) - [Commits](https://github.com/serde-rs/serde/compare/v1.0.216...v1.0.217) Updates `cc` from 1.2.5 to 1.2.6 - [Release notes](https://github.com/rust-lang/cc-rs/releases) - [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-lang/cc-rs/compare/cc-v1.2.5...cc-v1.2.6) Updates `glob` from 0.3.1 to 0.3.2 - [Release notes](https://github.com/rust-lang/glob/releases) - [Changelog](https://github.com/rust-lang/glob/blob/master/CHANGELOG.md) - [Commits](https://github.com/rust-lang/glob/compare/0.3.1...v0.3.2) --- updated-dependencies: - dependency-name: zerocopy dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: quote dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: syn dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: serde dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: serde_derive dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: cc dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: glob dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker ... Signed-off-by: dependabot[bot] --- Cargo.lock | 38 ++++++++++++++-------------- src/acpi-tables/Cargo.toml | 2 +- src/clippy-tracing/Cargo.toml | 4 +-- src/cpu-template-helper/Cargo.toml | 2 +- src/firecracker/Cargo.toml | 6 ++--- src/log-instrument-macros/Cargo.toml | 4 +-- src/seccompiler/Cargo.toml | 2 +- src/vmm/Cargo.toml | 4 +-- 8 files changed, 31 insertions(+), 31 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0d0b995381e..b4f5012f7ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9,7 +9,7 @@ dependencies = [ "displaydoc", "thiserror 2.0.9", "vm-memory", - "zerocopy 0.8.13", + "zerocopy 0.8.14", ] [[package]] @@ -261,9 +261,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.5" +version = "1.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c31a0499c1dc64f458ad13872de75c0eb7e3fdb0e67964610c914b034fc5956e" +checksum = "8d6dbb628b8f8555f86d0323c2eb39e3ec81901f4b83e091db8a6a76d316a333" dependencies = [ "jobserver", "libc", @@ -685,9 +685,9 @@ dependencies = [ [[package]] name = "glob" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" [[package]] name = "half" @@ -1060,9 +1060,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.37" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" dependencies = [ "proc-macro2", ] @@ -1207,18 +1207,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.216" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.216" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote", @@ -1291,9 +1291,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.91" +version = "2.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d53cbcb5a243bd33b7858b1d7f4aca2153490815872d86d955d6ea29f743c035" +checksum = "9c786062daee0d6db1132800e623df74274a0a87322d8e183338e01b3d98d058" dependencies = [ "proc-macro2", "quote", @@ -1592,7 +1592,7 @@ dependencies = [ "vm-memory", "vm-superio", "vmm-sys-util", - "zerocopy 0.8.13", + "zerocopy 0.8.14", ] [[package]] @@ -1769,11 +1769,11 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.13" +version = "0.8.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67914ab451f3bfd2e69e5e9d2ef3858484e7074d63f204fd166ec391b54de21d" +checksum = "a367f292d93d4eab890745e75a778da40909cab4d6ff8173693812f79c4a2468" dependencies = [ - "zerocopy-derive 0.8.13", + "zerocopy-derive 0.8.14", ] [[package]] @@ -1789,9 +1789,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.8.13" +version = "0.8.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7988d73a4303ca289df03316bc490e934accf371af6bc745393cf3c2c5c4f25d" +checksum = "d3931cb58c62c13adec22e38686b559c86a30565e16ad6e8510a337cedc611e1" dependencies = [ "proc-macro2", "quote", diff --git a/src/acpi-tables/Cargo.toml b/src/acpi-tables/Cargo.toml index 7334406670a..24924b0a691 100644 --- a/src/acpi-tables/Cargo.toml +++ b/src/acpi-tables/Cargo.toml @@ -10,7 +10,7 @@ license = "Apache-2.0" displaydoc = "0.2.5" thiserror = "2.0.9" vm-memory = { version = "0.16.1", features = ["backend-mmap", "backend-bitmap"] } -zerocopy = { version = "0.8.13", features = ["derive"] } +zerocopy = { version = "0.8.14", features = ["derive"] } [lib] bench = false diff --git a/src/clippy-tracing/Cargo.toml b/src/clippy-tracing/Cargo.toml index 92dbf249fc1..626f23876a8 100644 --- a/src/clippy-tracing/Cargo.toml +++ b/src/clippy-tracing/Cargo.toml @@ -13,8 +13,8 @@ bench = false clap = { version = "4.5.23", features = ["derive"] } itertools = "0.13.0" proc-macro2 = { version = "1.0.92", features = ["span-locations"] } -quote = "1.0.37" -syn = { version = "2.0.91", features = ["full", "extra-traits", "visit", "visit-mut", "printing"] } +quote = "1.0.38" +syn = { version = "2.0.93", features = ["full", "extra-traits", "visit", "visit-mut", "printing"] } walkdir = "2.5.0" [dev-dependencies] diff --git a/src/cpu-template-helper/Cargo.toml b/src/cpu-template-helper/Cargo.toml index b4a8f8e3962..a2075f6b3f3 100644 --- a/src/cpu-template-helper/Cargo.toml +++ b/src/cpu-template-helper/Cargo.toml @@ -14,7 +14,7 @@ clap = { version = "4.5.23", features = ["derive", "string"] } displaydoc = "0.2.5" libc = "0.2.169" log-instrument = { path = "../log-instrument", optional = true } -serde = { version = "1.0.216", features = ["derive"] } +serde = { version = "1.0.217", features = ["derive"] } serde_json = "1.0.134" thiserror = "2.0.9" diff --git a/src/firecracker/Cargo.toml b/src/firecracker/Cargo.toml index 1f5a18d9b2e..3f9894f0144 100644 --- a/src/firecracker/Cargo.toml +++ b/src/firecracker/Cargo.toml @@ -23,7 +23,7 @@ log-instrument = { path = "../log-instrument", optional = true } micro_http = { git = "https://github.com/firecracker-microvm/micro-http" } seccompiler = { path = "../seccompiler" } -serde = { version = "1.0.216", features = ["derive"] } +serde = { version = "1.0.217", features = ["derive"] } serde_derive = "1.0.136" serde_json = "1.0.134" thiserror = "2.0.9" @@ -38,13 +38,13 @@ libc = "0.2.169" regex = { version = "1.11.1", default-features = false, features = ["std", "unicode-perl"] } # Dev-Dependencies for uffd examples -serde = { version = "1.0.216", features = ["derive"] } +serde = { version = "1.0.217", features = ["derive"] } userfaultfd = "0.8.1" [build-dependencies] bincode = "1.2.1" seccompiler = { path = "../seccompiler" } -serde = { version = "1.0.216" } +serde = { version = "1.0.217" } serde_json = "1.0.134" [features] diff --git a/src/log-instrument-macros/Cargo.toml b/src/log-instrument-macros/Cargo.toml index d1eb9c0b0d2..e3937bbb02d 100644 --- a/src/log-instrument-macros/Cargo.toml +++ b/src/log-instrument-macros/Cargo.toml @@ -12,8 +12,8 @@ bench = false [dependencies] proc-macro2 = "1.0.92" -quote = "1.0.37" -syn = { version = "2.0.91", features = ["full", "extra-traits"] } +quote = "1.0.38" +syn = { version = "2.0.93", features = ["full", "extra-traits"] } [lints] workspace = true diff --git a/src/seccompiler/Cargo.toml b/src/seccompiler/Cargo.toml index 1e5cf55866a..67197bfb82b 100644 --- a/src/seccompiler/Cargo.toml +++ b/src/seccompiler/Cargo.toml @@ -20,7 +20,7 @@ bincode = "1.2.1" displaydoc = "0.2.5" libc = "0.2.169" log-instrument = { path = "../log-instrument", optional = true } -serde = { version = "1.0.216", features = ["derive"] } +serde = { version = "1.0.217", features = ["derive"] } serde_json = "1.0.134" thiserror = "2.0.9" diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 64e76bbc15d..f024420e8ca 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -33,7 +33,7 @@ micro_http = { git = "https://github.com/firecracker-microvm/micro-http" } seccompiler = { path = "../seccompiler" } semver = { version = "1.0.24", features = ["serde"] } -serde = { version = "1.0.216", features = ["derive", "rc"] } +serde = { version = "1.0.217", features = ["derive", "rc"] } serde_json = "1.0.134" slab = "0.4.7" thiserror = "2.0.9" @@ -45,7 +45,7 @@ vm-allocator = "0.1.0" vm-memory = { version = "0.16.1", features = ["backend-mmap", "backend-bitmap"] } vm-superio = "0.8.0" vmm-sys-util = { version = "0.12.1", features = ["with-serde"] } -zerocopy = { version = "0.8.13" } +zerocopy = { version = "0.8.14" } [target.'cfg(target_arch = "aarch64")'.dependencies] vm-fdt = "0.3.0" From da47e8f1bb9773162efdddb33bc83901c9eef069 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2025 16:39:03 +0000 Subject: [PATCH 14/78] build(deps): Bump the firecracker group with 6 updates Bumps the firecracker group with 6 updates: | Package | From | To | | --- | --- | --- | | [itertools](https://github.com/rust-itertools/itertools) | `0.13.0` | `0.14.0` | | [syn](https://github.com/dtolnay/syn) | `2.0.93` | `2.0.95` | | [aws-lc-sys](https://github.com/aws/aws-lc-rs) | `0.24.0` | `0.24.1` | | [cc](https://github.com/rust-lang/cc-rs) | `1.2.6` | `1.2.7` | | [prettyplease](https://github.com/dtolnay/prettyplease) | `0.2.25` | `0.2.27` | | [winnow](https://github.com/winnow-rs/winnow) | `0.6.20` | `0.6.22` | Updates `itertools` from 0.13.0 to 0.14.0 - [Changelog](https://github.com/rust-itertools/itertools/blob/master/CHANGELOG.md) - [Commits](https://github.com/rust-itertools/itertools/compare/v0.13.0...v0.14.0) Updates `syn` from 2.0.93 to 2.0.95 - [Release notes](https://github.com/dtolnay/syn/releases) - [Commits](https://github.com/dtolnay/syn/compare/2.0.93...2.0.95) Updates `aws-lc-sys` from 0.24.0 to 0.24.1 - [Release notes](https://github.com/aws/aws-lc-rs/releases) - [Commits](https://github.com/aws/aws-lc-rs/compare/aws-lc-sys/v0.24.0...aws-lc-sys/v0.24.1) Updates `cc` from 1.2.6 to 1.2.7 - [Release notes](https://github.com/rust-lang/cc-rs/releases) - [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-lang/cc-rs/compare/cc-v1.2.6...cc-v1.2.7) Updates `prettyplease` from 0.2.25 to 0.2.27 - [Release notes](https://github.com/dtolnay/prettyplease/releases) - [Commits](https://github.com/dtolnay/prettyplease/compare/0.2.25...0.2.27) Updates `winnow` from 0.6.20 to 0.6.22 - [Changelog](https://github.com/winnow-rs/winnow/blob/main/CHANGELOG.md) - [Commits](https://github.com/winnow-rs/winnow/compare/v0.6.20...v0.6.22) --- updated-dependencies: - dependency-name: itertools dependency-type: direct:production update-type: version-update:semver-minor dependency-group: firecracker - dependency-name: syn dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: aws-lc-sys dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: cc dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: prettyplease dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: winnow dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker ... Signed-off-by: dependabot[bot] --- Cargo.lock | 35 ++++++++++++++-------------- src/clippy-tracing/Cargo.toml | 4 ++-- src/log-instrument-macros/Cargo.toml | 2 +- src/vmm/Cargo.toml | 2 +- 4 files changed, 21 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b4f5012f7ed..8fc1d38d933 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -154,16 +154,15 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.24.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8478a5c29ead3f3be14aff8a202ad965cf7da6856860041bfca271becf8ba48b" +checksum = "923ded50f602b3007e5e63e3f094c479d9c8a9b42d7f4034e4afe456aa48bfd2" dependencies = [ "bindgen 0.69.5", "cc", "cmake", "dunce", "fs_extra", - "libc", "paste", ] @@ -261,9 +260,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.6" +version = "1.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d6dbb628b8f8555f86d0323c2eb39e3ec81901f4b83e091db8a6a76d316a333" +checksum = "a012a0df96dd6d06ba9a1b29d6402d1a5d77c6befd2566afdc26e10603dc93d7" dependencies = [ "jobserver", "libc", @@ -387,7 +386,7 @@ name = "clippy-tracing" version = "0.1.0" dependencies = [ "clap", - "itertools 0.13.0", + "itertools 0.14.0", "proc-macro2", "quote", "syn", @@ -585,7 +584,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -779,9 +778,9 @@ dependencies = [ [[package]] name = "itertools" -version = "0.13.0" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" dependencies = [ "either", ] @@ -1025,9 +1024,9 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.25" +version = "0.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" +checksum = "483f8c21f64f3ea09fe0f30f5d48c3e8eefe5dac9129f0075f76593b4c1da705" dependencies = [ "proc-macro2", "syn", @@ -1163,7 +1162,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -1291,9 +1290,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.93" +version = "2.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c786062daee0d6db1132800e623df74274a0a87322d8e183338e01b3d98d058" +checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a" dependencies = [ "proc-macro2", "quote", @@ -1567,7 +1566,7 @@ dependencies = [ "event-manager", "gdbstub", "gdbstub_arch", - "itertools 0.13.0", + "itertools 0.14.0", "kvm-bindings", "kvm-ioctls", "libc", @@ -1657,7 +1656,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -1750,9 +1749,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.6.20" +version = "0.6.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b" +checksum = "39281189af81c07ec09db316b302a3e67bf9bd7cbf6c820b50e35fee9c2fa980" dependencies = [ "memchr", ] diff --git a/src/clippy-tracing/Cargo.toml b/src/clippy-tracing/Cargo.toml index 626f23876a8..09c03b36b29 100644 --- a/src/clippy-tracing/Cargo.toml +++ b/src/clippy-tracing/Cargo.toml @@ -11,10 +11,10 @@ bench = false [dependencies] clap = { version = "4.5.23", features = ["derive"] } -itertools = "0.13.0" +itertools = "0.14.0" proc-macro2 = { version = "1.0.92", features = ["span-locations"] } quote = "1.0.38" -syn = { version = "2.0.93", features = ["full", "extra-traits", "visit", "visit-mut", "printing"] } +syn = { version = "2.0.95", features = ["full", "extra-traits", "visit", "visit-mut", "printing"] } walkdir = "2.5.0" [dev-dependencies] diff --git a/src/log-instrument-macros/Cargo.toml b/src/log-instrument-macros/Cargo.toml index e3937bbb02d..549a851a0da 100644 --- a/src/log-instrument-macros/Cargo.toml +++ b/src/log-instrument-macros/Cargo.toml @@ -13,7 +13,7 @@ bench = false [dependencies] proc-macro2 = "1.0.92" quote = "1.0.38" -syn = { version = "2.0.93", features = ["full", "extra-traits"] } +syn = { version = "2.0.95", features = ["full", "extra-traits"] } [lints] workspace = true diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index f024420e8ca..c73b090a2dc 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -53,7 +53,7 @@ vm-fdt = "0.3.0" [dev-dependencies] criterion = { version = "0.5.0", default-features = false } device_tree = "1.1.0" -itertools = "0.13.0" +itertools = "0.14.0" proptest = { version = "1.6.0", default-features = false, features = ["std"] } [features] From f610cae59a5d5a174e22ccb4a6d2befa530d1c49 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Tue, 7 Jan 2025 17:16:35 +0000 Subject: [PATCH 15/78] fix: stop messing with C-states in performance tests When running performance tests, the devtool script was disabling lower C-states, leaving only C0 (e.g. "CPU actively polling for new instructions") enabled, with the goal to reduce latencies of switching between C-states, and therefore increase test stability. However, we found out that on m6i (Icelake) this was causing performance to become erratic, with most performance metrics taking on bi-modal characteristics. Since back when we originally disabled sleep states, we did not see any impact on metrics, the theory that it helps with test stability doesn't have any experimental support, so removing the C-state configuration to fix the volatility on m6i is not expected to have any drawbacks. Signed-off-by: Patrick Roy --- tools/devtool | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/tools/devtool b/tools/devtool index a55fda81269..9e71cc6191f 100755 --- a/tools/devtool +++ b/tools/devtool @@ -630,11 +630,10 @@ apply_linux_61_tweaks() { } -# Modifies the processors C- and P-state configuration (x86_64 only) for consistent performance. This means +# Modifies the processors CPU governor and P-state configuration (x86_64 only) for consistent performance. This means # - Disable turbo boost (Intel only) by writing 1 to /sys/devices/system/cpu/intel_pstate/no_turbo # - Disable turbo boost (AMD only) by writing 0 to /sys/devices/system/cpu/cpufreq/boost # - Lock the CPUs' P-state to the highest non-turbo one (Intel only) by writing 100 to /sys/devices/system/cpu/intel_pstate/{min,max}_perf_pct -# - Disable all idle C-states, meaning all CPu cores will idle by polling (busy looping) by writing 1 to /sys/devices/system/cpu/cpu*/cpuidle/state*/disable # - Set the cpu frequency governor to performance by writing "performance" to /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor apply_performance_tweaks() { # m6a instances do not support the amd_pstate driver (yet), so nothing we can do there @@ -660,15 +659,6 @@ apply_performance_tweaks() { # their maximum safe frequency. It seems to be the default for Amazon Linux, but it doesn't hurt to make this explicit. # See also https://wiki.archlinux.org/title/CPU_frequency_scaling echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor &> /dev/null - - # When a CPU core has nothing to do, it enters an idle state, also called "C-state". These are enumerated, with C0 - # being the shallowest idle state (corresponding to "currently executing instructions", aka "not actually idling"), - # and higher numbers being deeper sleep states (how many there are depends on the specific processor). The deeper - # a C-state a CPU core enters, the higher the latency to wake it up again. We can disable deeper C-states altogether - # by forcing each CPU core to constantly stay in C-0 (e.g. have them actively poll for new things to do). - # See also https://www.kernel.org/doc/html/v5.0/admin-guide/pm/cpuidle.html. - # The below also set "disable=1" on "state0", but this does not do anything (as disabling C-0 makes no sense). - echo 1 |sudo tee /sys/devices/system/cpu/cpu*/cpuidle/state*/disable &> /dev/null } unapply_performance_tweaks() { @@ -683,9 +673,6 @@ unapply_performance_tweaks() { echo 1 | sudo tee /sys/devices/system/cpu/cpufreq/boost &> /dev/null fi - # reenable deeper sleep states - echo 0 | sudo tee /sys/devices/system/cpu/cpu*/cpuidle/state*/disable &>/dev/null - # We do not reset the governor, as keeping track of each CPUs configured governor is not trivial here. On our CI # instances, the performance governor is current the default anyway (2023/11/14) } From 9ab3358e6631d7fa1905bb6eb3ce1feb4c6aace6 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Wed, 8 Jan 2025 10:47:20 +0000 Subject: [PATCH 16/78] chore: unpin cargo-deny and specify tag for libseccomp Unpin cargo-deny because rust tool-chain has been upgraded. Specify tag for libseccomp to have a fixed version. Signed-off-by: Egor Lazarchuk --- tools/devctr/Dockerfile | 10 ++++++++-- tools/devtool | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/devctr/Dockerfile b/tools/devctr/Dockerfile index 9340c4c9c98..a9f6710a8a7 100644 --- a/tools/devctr/Dockerfile +++ b/tools/devctr/Dockerfile @@ -113,7 +113,7 @@ RUN curl https://sh.rustup.rs -sSf | sh -s -- -y --profile minimal --default-too && rustup target add x86_64-unknown-linux-musl \ && rustup target add aarch64-unknown-linux-musl \ && rustup component add llvm-tools-preview clippy rustfmt \ - && cargo install --locked cargo-audit cargo-deny@0.16.1 grcov cargo-sort cargo-afl \ + && cargo install --locked cargo-audit cargo-deny grcov cargo-sort cargo-afl \ && cargo install --locked kani-verifier && cargo kani setup \ \ && NIGHTLY_TOOLCHAIN=$(rustup toolchain list | grep nightly | tr -d '\n') \ @@ -149,12 +149,18 @@ RUN cd /usr/include/$ARCH-linux-musl \ && ln -s ../asm-generic asm-generic # Install static version of libseccomp -# +# We need to compile from source because +# libseccomp provided by the distribution is not +# compiled with musl-gcc and we need this +# for our musl builds. +# We specify the tag in order to have a fixed version +# of the library. RUN apt-get update \ && apt-get -y install \ libtool gperf \ && git clone https://github.com/seccomp/libseccomp /tmp/libseccomp \ && cd /tmp/libseccomp \ + && git checkout tags/v2.5.5 \ && ./autogen.sh \ && CC="musl-gcc -static" ./configure --enable-static=yes --enable-shared=false \ && make install \ diff --git a/tools/devtool b/tools/devtool index 9e71cc6191f..3c4e8834d52 100755 --- a/tools/devtool +++ b/tools/devtool @@ -68,7 +68,7 @@ DEVCTR_IMAGE_NO_TAG="public.ecr.aws/firecracker/fcuvm" # Development container tag -DEVCTR_IMAGE_TAG=${DEVCTR_IMAGE_TAG:-v76} +DEVCTR_IMAGE_TAG=${DEVCTR_IMAGE_TAG:-v77} # Development container image (name:tag) # This should be updated whenever we upgrade the development container. From 5925eb294d54a36f6172648f1ed386649387fd96 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Wed, 8 Jan 2025 10:52:40 +0000 Subject: [PATCH 17/78] chore: allow seccomp rules integration tests to run on aarch64 Remove skip for test_redundant_seccomp_rules for aarch64. Signed-off-by: Egor Lazarchuk --- .../build/test_seccomp_no_redundant_rules.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/integration_tests/build/test_seccomp_no_redundant_rules.py b/tests/integration_tests/build/test_seccomp_no_redundant_rules.py index 33c8764e512..a61b86b5547 100644 --- a/tests/integration_tests/build/test_seccomp_no_redundant_rules.py +++ b/tests/integration_tests/build/test_seccomp_no_redundant_rules.py @@ -5,8 +5,6 @@ import platform from pathlib import Path -import pytest - from framework import utils from framework.static_analysis import ( determine_unneeded_seccomp_rules, @@ -15,10 +13,6 @@ ) -@pytest.mark.skipif( - platform.machine() != "x86_64", - reason="aarch64 nightly toolchain does not support flags needed to compile analyzable binary yet", -) def test_redundant_seccomp_rules(): """Test that fails if static analysis determines redundant seccomp rules""" arch = platform.processor() From 8d2ab8ee71b0e0fb8e5d4bd1599a9c5dd620c157 Mon Sep 17 00:00:00 2001 From: longxiangqiao Date: Tue, 5 Nov 2024 18:50:30 +0800 Subject: [PATCH 18/78] chore: remove redundant words in comment Fix word duplicaiton in the comment. Co-Authored-By: longxiangqiao Signed-off-by: Egor Lazarchuk --- src/vmm/src/devices/virtio/vsock/packet.rs | 2 +- src/vmm/src/dumbo/tcp/connection.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vmm/src/devices/virtio/vsock/packet.rs b/src/vmm/src/devices/virtio/vsock/packet.rs index f19723d25cb..fb79c76292f 100644 --- a/src/vmm/src/devices/virtio/vsock/packet.rs +++ b/src/vmm/src/devices/virtio/vsock/packet.rs @@ -518,7 +518,7 @@ mod tests { )) } - // Test case: the buffer descriptor cannot fit all the data advertised by the the + // Test case: the buffer descriptor cannot fit all the data advertised by the // packet header `len` field. { create_context!(test_ctx, handler_ctx); diff --git a/src/vmm/src/dumbo/tcp/connection.rs b/src/vmm/src/dumbo/tcp/connection.rs index 8036f428318..e17be1063af 100644 --- a/src/vmm/src/dumbo/tcp/connection.rs +++ b/src/vmm/src/dumbo/tcp/connection.rs @@ -815,7 +815,7 @@ impl Connection { /// * `mss_reserved` - How much (if anything) of the MSS value has been already used at the /// lower layers (by IP options, for example). This will be zero most of the time. /// * `payload_src` - References a buffer which contains data to send, and also specifies the - /// sequence number associated with the first byte from that that buffer. + /// sequence number associated with the first byte from that buffer. /// * `now` - An opaque timestamp representing the current moment in time. /// /// [`MAX_WINDOW_SIZE`]: ../constant.MAX_WINDOW_SIZE.html From bda67dfbfa5203ae38aee6598c5b49c32fd853ab Mon Sep 17 00:00:00 2001 From: Steven Wirges Date: Fri, 25 Oct 2024 13:33:48 +0200 Subject: [PATCH 19/78] chore: remove invalid trailing comma from the documentation Remove invalid trailing comma from a json body of a request in the documentation. Co-Authored-By: Steven Wirges Signed-off-by: Egor Lazarchuk --- docs/snapshotting/snapshot-support.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/snapshotting/snapshot-support.md b/docs/snapshotting/snapshot-support.md index c8d964ae89b..910dde94ce7 100644 --- a/docs/snapshotting/snapshot-support.md +++ b/docs/snapshotting/snapshot-support.md @@ -266,7 +266,7 @@ curl --unix-socket /tmp/firecracker.socket -i \ -d '{ "snapshot_type": "Full", "snapshot_path": "./snapshot_file", - "mem_file_path": "./mem_file", + "mem_file_path": "./mem_file" }' ``` From 03d3565a1cac7cb459f49b258e410e987c49f0ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Barb=C3=A1chano?= Date: Mon, 30 Oct 2023 11:31:10 +0100 Subject: [PATCH 20/78] tests: re-use network namespaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-using namespaces saves a whole minute of the test run: before: 302 passed, 25 skipped in 303.29s (0:05:03) after: 301 passed, 25 skipped in 241.77s (0:04:01) Signed-off-by: Pablo Barbáchano --- tests/conftest.py | 51 ++++++++++++++++--- tests/framework/microvm.py | 3 +- tests/host_tools/network.py | 41 +++++++++------ .../integration_tests/functional/test_api.py | 38 +++++++------- .../integration_tests/functional/test_net.py | 3 ++ 5 files changed, 92 insertions(+), 44 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index fb0fe4d5752..fb1fc0a2a98 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -44,6 +44,7 @@ static_cpu_templates_params, ) from host_tools.metrics import get_metrics_logger +from host_tools.network import NetNs # This codebase uses Python features available in Python 3.10 or above if sys.version_info < (3, 10): @@ -265,15 +266,46 @@ def uffd_handler_paths(): yield handlers -@pytest.fixture() -def microvm_factory(request, record_property, results_dir): - """Fixture to create microvms simply. +@pytest.fixture(scope="session") +def netns_factory(worker_id): + """A network namespace factory - In order to avoid running out of space when instantiating many microvms, - we remove the directory manually when the fixture is destroyed - (that is after every test). - One can comment the removal line, if it helps with debugging. + Network namespaces are created once per test session and re-used in subsequent tests. """ + # pylint:disable=protected-access + + class NetNsFactory: + """A Network namespace factory that reuses namespaces.""" + + def __init__(self, prefix: str): + self._all = [] + self._returned = [] + self.prefix = prefix + + def get(self, _netns_id): + """Get a free network namespace""" + if len(self._returned) > 0: + ns = self._returned.pop(0) + while ns.is_used(): + pass + return ns + ns = NetNs(self.prefix + str(len(self._all))) + # change the cleanup function so it is returned to the pool + ns._cleanup_orig = ns.cleanup + ns.cleanup = lambda: self._returned.append(ns) + self._all.append(ns) + return ns + + netns_fcty = NetNsFactory(f"netns-{worker_id}-") + yield netns_fcty.get + + for netns in netns_fcty._all: + netns._cleanup_orig() + + +@pytest.fixture() +def microvm_factory(request, record_property, results_dir, netns_factory): + """Fixture to create microvms simply.""" if binary_dir := request.config.getoption("--binary-dir"): fc_binary_path = Path(binary_dir) / "firecracker" @@ -298,7 +330,10 @@ def microvm_factory(request, record_property, results_dir): # We could override the chroot base like so # jailer_kwargs={"chroot_base": "/srv/jailo"} uvm_factory = MicroVMFactory( - fc_binary_path, jailer_binary_path, custom_cpu_template=custom_cpu_template + fc_binary_path, + jailer_binary_path, + netns_factory=netns_factory, + custom_cpu_template=custom_cpu_template, ) yield uvm_factory diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index 0903b689dfa..278cb9ecd60 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -1074,6 +1074,7 @@ def __init__(self, fc_binary_path: Path, jailer_binary_path: Path, **kwargs): self.vms = [] self.fc_binary_path = Path(fc_binary_path) self.jailer_binary_path = Path(jailer_binary_path) + self.netns_factory = kwargs.pop("netns_factory", net_tools.NetNs) self.kwargs = kwargs def build(self, kernel=None, rootfs=None, **kwargs): @@ -1086,7 +1087,7 @@ def build(self, kernel=None, rootfs=None, **kwargs): jailer_binary_path=kwargs.pop( "jailer_binary_path", self.jailer_binary_path ), - netns=kwargs.pop("netns", net_tools.NetNs(microvm_id)), + netns=kwargs.pop("netns", self.netns_factory(microvm_id)), **kwargs, ) vm.netns.setup() diff --git a/tests/host_tools/network.py b/tests/host_tools/network.py index af0b3766ee0..93cdb323c50 100644 --- a/tests/host_tools/network.py +++ b/tests/host_tools/network.py @@ -251,15 +251,13 @@ def __init__(self, name, netns, ip=None): It also creates a new tap device, brings it up and moves the interface to the specified namespace. """ - # Avoid a conflict if two tests want to create the same tap device tap0 - # in the host before moving it into its own netns - temp_name = "tap" + random_str(k=8) - utils.check_output(f"ip tuntap add mode tap name {temp_name}") - utils.check_output(f"ip link set {temp_name} name {name} netns {netns}") - if ip: - utils.check_output(f"ip netns exec {netns} ifconfig {name} {ip} up") self._name = name self._netns = netns + # Create the tap device tap0 directly in the network namespace to avoid + # conflicts + self.netns.check_output(f"ip tuntap add mode tap name {name}") + if ip: + self.netns.check_output(f"ifconfig {name} {ip} up") @property def name(self): @@ -273,14 +271,10 @@ def netns(self): def set_tx_queue_len(self, tx_queue_len): """Set the length of the tap's TX queue.""" - utils.check_output( - "ip netns exec {} ip link set {} txqueuelen {}".format( - self.netns, self.name, tx_queue_len - ) - ) + self.netns.check_output(f"ip link set {self.name} txqueuelen {tx_queue_len}") def __repr__(self): - return f"" + return f"" @dataclass(frozen=True, repr=True) @@ -315,7 +309,7 @@ def with_id(i, netmask_len=30): ) -@dataclass(frozen=True, repr=True) +@dataclass(repr=True) class NetNs: """Defines a network namespace.""" @@ -334,6 +328,10 @@ def cmd_prefix(self): """Return the jailer context netns file prefix.""" return f"ip netns exec {self.id}" + def check_output(self, cmd: str): + """Run a command inside the netns.""" + return utils.check_output(f"{self.cmd_prefix()} {cmd}") + def setup(self): """Set up this network namespace.""" if not self.path.exists(): @@ -350,6 +348,19 @@ def add_tap(self, name, ip): We assume that a Tap is always configured with the same IP. """ if name not in self.taps: - tap = Tap(name, self.id, ip) + tap = Tap(name, self, ip) self.taps[name] = tap return self.taps[name] + + def is_used(self): + """Are any of the TAPs still in use + + Waits until there's no carrier signal. + Otherwise trying to reuse the TAP may return + `Resource busy (os error 16)` + """ + for tap in self.taps: + _, stdout, _ = self.check_output(f"cat /sys/class/net/{tap}/carrier") + if stdout.strip() != "0": + return True + return False diff --git a/tests/integration_tests/functional/test_api.py b/tests/integration_tests/functional/test_api.py index 1e54c7b4fb1..94166374bd3 100644 --- a/tests/integration_tests/functional/test_api.py +++ b/tests/integration_tests/functional/test_api.py @@ -191,15 +191,15 @@ def test_net_api_put_update_pre_boot(uvm_plain): test_microvm = uvm_plain test_microvm.spawn() - first_if_name = "first_tap" - tap1 = net_tools.Tap(first_if_name, test_microvm.netns.id) + tap1name = test_microvm.id[:8] + "tap1" + tap1 = net_tools.Tap(tap1name, test_microvm.netns) test_microvm.api.network.put( iface_id="1", guest_mac="06:00:00:00:00:01", host_dev_name=tap1.name ) # Adding new network interfaces is allowed. - second_if_name = "second_tap" - tap2 = net_tools.Tap(second_if_name, test_microvm.netns.id) + tap2name = test_microvm.id[:8] + "tap2" + tap2 = net_tools.Tap(tap2name, test_microvm.netns) test_microvm.api.network.put( iface_id="2", guest_mac="07:00:00:00:00:01", host_dev_name=tap2.name ) @@ -209,28 +209,26 @@ def test_net_api_put_update_pre_boot(uvm_plain): expected_msg = f"The MAC address is already in use: {guest_mac}" with pytest.raises(RuntimeError, match=expected_msg): test_microvm.api.network.put( - iface_id="2", host_dev_name=second_if_name, guest_mac=guest_mac + iface_id="2", host_dev_name=tap2name, guest_mac=guest_mac ) # Updates to a network interface with an available MAC are allowed. test_microvm.api.network.put( - iface_id="2", host_dev_name=second_if_name, guest_mac="08:00:00:00:00:01" + iface_id="2", host_dev_name=tap2name, guest_mac="08:00:00:00:00:01" ) # Updates to a network interface with an unavailable name are not allowed. expected_msg = "Could not create the network device" with pytest.raises(RuntimeError, match=expected_msg): test_microvm.api.network.put( - iface_id="1", host_dev_name=second_if_name, guest_mac="06:00:00:00:00:01" + iface_id="1", host_dev_name=tap2name, guest_mac="06:00:00:00:00:01" ) # Updates to a network interface with an available name are allowed. - iface_id = "1" - tapname = test_microvm.id[:8] + "tap" + iface_id - - tap3 = net_tools.Tap(tapname, test_microvm.netns.id) + tap3name = test_microvm.id[:8] + "tap3" + tap3 = net_tools.Tap(tap3name, test_microvm.netns) test_microvm.api.network.put( - iface_id=iface_id, host_dev_name=tap3.name, guest_mac="06:00:00:00:00:01" + iface_id="3", host_dev_name=tap3.name, guest_mac="06:00:00:00:00:01" ) @@ -266,7 +264,7 @@ def test_api_mmds_config(uvm_plain): test_microvm.api.mmds_config.put(network_interfaces=["foo"]) # Attach network interface. - tap = net_tools.Tap("tap1", test_microvm.netns.id) + tap = net_tools.Tap(f"tap1-{test_microvm.id[:6]}", test_microvm.netns) test_microvm.api.network.put( iface_id="1", guest_mac="06:00:00:00:00:01", host_dev_name=tap.name ) @@ -487,7 +485,7 @@ def test_api_put_update_post_boot(uvm_plain, io_engine): iface_id = "1" tapname = test_microvm.id[:8] + "tap" + iface_id - tap1 = net_tools.Tap(tapname, test_microvm.netns.id) + tap1 = net_tools.Tap(tapname, test_microvm.netns) test_microvm.api.network.put( iface_id=iface_id, host_dev_name=tap1.name, guest_mac="06:00:00:00:00:01" @@ -595,7 +593,7 @@ def test_rate_limiters_api_config(uvm_plain, io_engine): # Test network with tx bw rate-limiting. iface_id = "1" tapname = test_microvm.id[:8] + "tap" + iface_id - tap1 = net_tools.Tap(tapname, test_microvm.netns.id) + tap1 = net_tools.Tap(tapname, test_microvm.netns) test_microvm.api.network.put( iface_id=iface_id, @@ -607,7 +605,7 @@ def test_rate_limiters_api_config(uvm_plain, io_engine): # Test network with rx bw rate-limiting. iface_id = "2" tapname = test_microvm.id[:8] + "tap" + iface_id - tap2 = net_tools.Tap(tapname, test_microvm.netns.id) + tap2 = net_tools.Tap(tapname, test_microvm.netns) test_microvm.api.network.put( iface_id=iface_id, guest_mac="06:00:00:00:00:02", @@ -618,7 +616,7 @@ def test_rate_limiters_api_config(uvm_plain, io_engine): # Test network with tx and rx bw and ops rate-limiting. iface_id = "3" tapname = test_microvm.id[:8] + "tap" + iface_id - tap3 = net_tools.Tap(tapname, test_microvm.netns.id) + tap3 = net_tools.Tap(tapname, test_microvm.netns) test_microvm.api.network.put( iface_id=iface_id, guest_mac="06:00:00:00:00:03", @@ -665,7 +663,7 @@ def test_api_patch_pre_boot(uvm_plain, io_engine): iface_id = "1" tapname = test_microvm.id[:8] + "tap" + iface_id - tap1 = net_tools.Tap(tapname, test_microvm.netns.id) + tap1 = net_tools.Tap(tapname, test_microvm.netns) test_microvm.api.network.put( iface_id=iface_id, host_dev_name=tap1.name, guest_mac="06:00:00:00:00:01" ) @@ -714,7 +712,7 @@ def test_negative_api_patch_post_boot(uvm_plain, io_engine): iface_id = "1" tapname = test_microvm.id[:8] + "tap" + iface_id - tap1 = net_tools.Tap(tapname, test_microvm.netns.id) + tap1 = net_tools.Tap(tapname, test_microvm.netns) test_microvm.api.network.put( iface_id=iface_id, host_dev_name=tap1.name, guest_mac="06:00:00:00:00:01" ) @@ -1245,7 +1243,7 @@ def test_get_full_config(uvm_plain): # Add a net device. iface_id = "1" tapname = test_microvm.id[:8] + "tap" + iface_id - tap1 = net_tools.Tap(tapname, test_microvm.netns.id) + tap1 = net_tools.Tap(tapname, test_microvm.netns) guest_mac = "06:00:00:00:00:01" tx_rl = { "bandwidth": {"size": 1000000, "refill_time": 100, "one_time_burst": None}, diff --git a/tests/integration_tests/functional/test_net.py b/tests/integration_tests/functional/test_net.py index 7b784e453c5..a01157dcf76 100644 --- a/tests/integration_tests/functional/test_net.py +++ b/tests/integration_tests/functional/test_net.py @@ -83,6 +83,9 @@ def test_multi_queue_unsupported(uvm_plain): guest_mac="AA:FC:00:00:00:01", ) + # clean TAP device + utils.run_cmd(f"{microvm.netns.cmd_prefix()} ip link del name {tapname}") + @pytest.fixture def uvm_any(microvm_factory, uvm_ctor, guest_kernel, rootfs): From 9338d79b59ddd2e88a1172e46d555d3804410d50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Barb=C3=A1chano?= Date: Mon, 20 Nov 2023 10:55:16 +0100 Subject: [PATCH 21/78] ci: increase functional test concurrency from 8 to 16 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that there are no bottlenecks we can increase the concurrency of the tests. This speeds up functional test runs by 20-30% in average. | host | before (s) | after (s) | change | |----------|------------|-----------|--------| | m7g 5.10 | 195 | 161 | -18% | | m7g 6.1 | 225 | 172 | -24% | | m5n 5.10 | 466 | 274 | -40% | | m5n 6.1 | 440 | 291 | -33% | Signed-off-by: Pablo Barbáchano --- .buildkite/pipeline_cross.py | 2 +- .buildkite/pipeline_pr.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.buildkite/pipeline_cross.py b/.buildkite/pipeline_cross.py index d7cd261a3d0..1bac8c4bc3b 100755 --- a/.buildkite/pipeline_cross.py +++ b/.buildkite/pipeline_cross.py @@ -85,7 +85,7 @@ f"buildkite-agent artifact download snapshots/{src_instance}_{src_kv}.tar .", f"tar xSvf snapshots/{src_instance}_{src_kv}.tar", *pipeline.devtool_test( - pytest_opts=f"-m nonci -n4 {k_val} integration_tests/functional/test_snapshot_restore_cross_kernel.py", + pytest_opts=f"-m nonci -n8 --dist worksteal {k_val} integration_tests/functional/test_snapshot_restore_cross_kernel.py", ), ], "label": f"🎬 {src_instance} {src_kv} ➡️ {dst_instance} {dst_kv}", diff --git a/.buildkite/pipeline_pr.py b/.buildkite/pipeline_pr.py index 618aa17860b..5b4693f51bd 100755 --- a/.buildkite/pipeline_pr.py +++ b/.buildkite/pipeline_pr.py @@ -76,7 +76,7 @@ pipeline.build_group( "⚙ Functional and security 🔒", pipeline.devtool_test( - pytest_opts="-n 8 --dist worksteal integration_tests/{{functional,security}}", + pytest_opts="-n 16 --dist worksteal integration_tests/{{functional,security}}", ), ) From 7e1f3d811fa9dfa79c2bbd9cefc71602bf101676 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Barb=C3=A1chano?= Date: Thu, 19 Dec 2024 11:05:59 +0100 Subject: [PATCH 22/78] tests: clean up conftest.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove unused import Signed-off-by: Pablo Barbáchano --- tests/conftest.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index fb1fc0a2a98..8c81714f716 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,6 @@ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 -# We import some fixtures that are unused. Disable that too. -# pylint:disable=unused-import - """Imported by pytest at the start of every test session. # Fixture Goals @@ -25,12 +22,10 @@ import inspect import json import os -import re import shutil import sys import tempfile from pathlib import Path -from typing import Dict import pytest @@ -57,7 +52,7 @@ METRICS = get_metrics_logger() -PHASE_REPORT_KEY = pytest.StashKey[Dict[str, pytest.CollectReport]]() +PHASE_REPORT_KEY = pytest.StashKey[dict[str, pytest.CollectReport]]() def pytest_addoption(parser): @@ -456,19 +451,13 @@ def rootfs_rw(): @pytest.fixture def uvm_plain(microvm_factory, guest_kernel_linux_5_10, rootfs): - """Create a vanilla VM, non-parametrized - kernel: 5.10 - rootfs: Ubuntu 24.04 - """ + """Create a vanilla VM, non-parametrized""" return microvm_factory.build(guest_kernel_linux_5_10, rootfs) @pytest.fixture def uvm_plain_rw(microvm_factory, guest_kernel_linux_5_10, rootfs_rw): - """Create a vanilla VM, non-parametrized - kernel: 5.10 - rootfs: Ubuntu 24.04 - """ + """Create a vanilla VM, non-parametrized""" return microvm_factory.build(guest_kernel_linux_5_10, rootfs_rw) From 4969e47d56f5a524c8604f1447fcab6458ac3d49 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Wed, 8 Jan 2025 16:40:59 +0000 Subject: [PATCH 23/78] test: unpin m6i/5.10 AMI for perf tests With C-states renabled by f610cae59a5d ("fix: stop messing with C-states in performance tests"), the m6i volatility will be gone. Signed-off-by: Patrick Roy --- .buildkite/pipeline_perf.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.buildkite/pipeline_perf.py b/.buildkite/pipeline_perf.py index e8169bfb2cd..a94c13b8af0 100755 --- a/.buildkite/pipeline_perf.py +++ b/.buildkite/pipeline_perf.py @@ -121,10 +121,7 @@ # } # will pin steps running on instances "m6i.metal" with kernel version tagged "linux_6.1" # to a new kernel version tagged "linux_6.1-pinned" -pins = { - # TODO: Unpin when performance instability on m6i/5.10 has gone. - "linux_5.10-pinned": {"instance": "m6i.metal", "kv": "linux_5.10"}, -} +pins = {} def apply_pins(steps): From 0e36ec47a0f94bff76677daae8e35f411d1b4bb6 Mon Sep 17 00:00:00 2001 From: Jack Thomson Date: Wed, 11 Dec 2024 16:57:28 +0000 Subject: [PATCH 24/78] Use 0ms delay on latency tests Using the larger delay appears to have negative side affects on AMD Signed-off-by: Jack Thomson --- tests/integration_tests/performance/test_network_ab.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration_tests/performance/test_network_ab.py b/tests/integration_tests/performance/test_network_ab.py index 60c4f17361e..3a50d864544 100644 --- a/tests/integration_tests/performance/test_network_ab.py +++ b/tests/integration_tests/performance/test_network_ab.py @@ -62,7 +62,7 @@ def test_network_latency(network_microvm, metrics): rounds = 15 request_per_round = 30 - delay = 0.2 + delay = 0.0 metrics.set_dimensions( { From a5ffb7a70ce19cb75612e0a5b402ed9438838c08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Barb=C3=A1chano?= Date: Thu, 9 Jan 2025 20:10:56 +0100 Subject: [PATCH 25/78] tests: add support for amazonlinux:2023 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test AL2023 in the popular containers test. Had to rework the build rootfs script to account for some small AL2023 differences. Signed-off-by: Pablo Barbáchano --- tools/test-popular-containers/build_rootfs.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/test-popular-containers/build_rootfs.sh b/tools/test-popular-containers/build_rootfs.sh index 501197949f0..7f5eb5fbfdf 100755 --- a/tools/test-popular-containers/build_rootfs.sh +++ b/tools/test-popular-containers/build_rootfs.sh @@ -48,7 +48,6 @@ function make_rootfs { systemd-nspawn --timezone=off --pipe -i $IMG /bin/sh <>/etc/inittab ;; +amzn) + dnf update + dnf install -y openssh-server iproute passwd + # re-do this + ln -svf /etc/systemd/system/fcnet.service /etc/systemd/system/sysinit.target.wants/fcnet.service + rm -fv /etc/systemd/system/getty.target.wants/getty@tty1.service + ;; esac +passwd -d root EOF } @@ -70,3 +77,4 @@ make_rootfs ubuntu:22.04 make_rootfs ubuntu:24.04 make_rootfs ubuntu:24.10 # make_rootfs ubuntu:latest +make_rootfs amazonlinux:2023 From a7fb815f915e66316f7a0b714eb0d06edf6ebb30 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Mon, 13 Jan 2025 13:44:15 +0000 Subject: [PATCH 26/78] chore: mark Firecracker 1.8 unsupported Firecracker v1.8 had reached EOL on 2025-01-10. Signed-off-by: Egor Lazarchuk --- docs/RELEASE_POLICY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/RELEASE_POLICY.md b/docs/RELEASE_POLICY.md index fe2f881d6b6..3b6b8308d45 100644 --- a/docs/RELEASE_POLICY.md +++ b/docs/RELEASE_POLICY.md @@ -92,7 +92,7 @@ v3.1 will be patched since were the last two Firecracker releases and less than | ------: | -----------: | -----------: | ------------------: | :------------------------------ | | v1.10 | 2024-11-07 | v1.10.1 | 2025-05-07 | Supported | | v1.9 | 2024-09-02 | v1.9.1 | 2025-03-02 | Supported | -| v1.8 | 2024-07-10 | v1.8.0 | 2025-01-10 | Supported | +| v1.8 | 2024-07-10 | v1.8.0 | 2025-01-10 | 2025-01-10 (end of 6mo support) | | v1.7 | 2024-03-18 | v1.7.0 | 2024-09-18 | 2024-09-18 (end of 6mo support) | | v1.6 | 2023-12-20 | v1.6.0 | 2024-06-20 | 2024-07-10 (v1.8 released) | | v1.5 | 2023-10-09 | v1.5.1 | 2024-04-09 | 2024-04-09 (end of 6mo support) | From 94826a8d7de32fc871945b7ee088f1f7d3c8a4cc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 13 Jan 2025 16:07:53 +0000 Subject: [PATCH 27/78] build(deps): Bump the firecracker group with 15 updates Bumps the firecracker group with 15 updates: | Package | From | To | | --- | --- | --- | | [thiserror](https://github.com/dtolnay/thiserror) | `2.0.9` | `2.0.11` | | [clap](https://github.com/clap-rs/clap) | `4.5.23` | `4.5.26` | | [proc-macro2](https://github.com/dtolnay/proc-macro2) | `1.0.92` | `1.0.93` | | [syn](https://github.com/dtolnay/syn) | `2.0.95` | `2.0.96` | | [uuid](https://github.com/uuid-rs/uuid) | `1.11.0` | `1.11.1` | | [serde_json](https://github.com/serde-rs/json) | `1.0.134` | `1.0.135` | | [bitflags](https://github.com/bitflags/bitflags) | `2.6.0` | `2.7.0` | | [cc](https://github.com/rust-lang/cc-rs) | `1.2.7` | `1.2.9` | | [clap_builder](https://github.com/clap-rs/clap) | `4.5.23` | `4.5.26` | | [clap_derive](https://github.com/clap-rs/clap) | `4.5.18` | `4.5.24` | | [linux-raw-sys](https://github.com/sunfishcode/linux-raw-sys) | `0.4.14` | `0.4.15` | | [prettyplease](https://github.com/dtolnay/prettyplease) | `0.2.27` | `0.2.29` | | [rustix](https://github.com/bytecodealliance/rustix) | `0.38.42` | `0.38.43` | | [uuid-macro-internal](https://github.com/uuid-rs/uuid) | `1.11.0` | `1.11.1` | | [winnow](https://github.com/winnow-rs/winnow) | `0.6.22` | `0.6.24` | Updates `thiserror` from 2.0.9 to 2.0.11 - [Release notes](https://github.com/dtolnay/thiserror/releases) - [Commits](https://github.com/dtolnay/thiserror/compare/2.0.9...2.0.11) Updates `clap` from 4.5.23 to 4.5.26 - [Release notes](https://github.com/clap-rs/clap/releases) - [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md) - [Commits](https://github.com/clap-rs/clap/compare/clap_complete-v4.5.23...clap_complete-v4.5.26) Updates `proc-macro2` from 1.0.92 to 1.0.93 - [Release notes](https://github.com/dtolnay/proc-macro2/releases) - [Commits](https://github.com/dtolnay/proc-macro2/compare/1.0.92...1.0.93) Updates `syn` from 2.0.95 to 2.0.96 - [Release notes](https://github.com/dtolnay/syn/releases) - [Commits](https://github.com/dtolnay/syn/compare/2.0.95...2.0.96) Updates `uuid` from 1.11.0 to 1.11.1 - [Release notes](https://github.com/uuid-rs/uuid/releases) - [Commits](https://github.com/uuid-rs/uuid/compare/1.11.0...1.11.1) Updates `serde_json` from 1.0.134 to 1.0.135 - [Release notes](https://github.com/serde-rs/json/releases) - [Commits](https://github.com/serde-rs/json/compare/v1.0.134...v1.0.135) Updates `bitflags` from 2.6.0 to 2.7.0 - [Release notes](https://github.com/bitflags/bitflags/releases) - [Changelog](https://github.com/bitflags/bitflags/blob/main/CHANGELOG.md) - [Commits](https://github.com/bitflags/bitflags/compare/2.6.0...2.7.0) Updates `cc` from 1.2.7 to 1.2.9 - [Release notes](https://github.com/rust-lang/cc-rs/releases) - [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-lang/cc-rs/compare/cc-v1.2.7...cc-v1.2.9) Updates `clap_builder` from 4.5.23 to 4.5.26 - [Release notes](https://github.com/clap-rs/clap/releases) - [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md) - [Commits](https://github.com/clap-rs/clap/compare/v4.5.23...v4.5.26) Updates `clap_derive` from 4.5.18 to 4.5.24 - [Release notes](https://github.com/clap-rs/clap/releases) - [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md) - [Commits](https://github.com/clap-rs/clap/compare/v4.5.18...v4.5.24) Updates `linux-raw-sys` from 0.4.14 to 0.4.15 - [Commits](https://github.com/sunfishcode/linux-raw-sys/compare/v0.4.14...v0.4.15) Updates `prettyplease` from 0.2.27 to 0.2.29 - [Release notes](https://github.com/dtolnay/prettyplease/releases) - [Commits](https://github.com/dtolnay/prettyplease/compare/0.2.27...0.2.29) Updates `rustix` from 0.38.42 to 0.38.43 - [Release notes](https://github.com/bytecodealliance/rustix/releases) - [Changelog](https://github.com/bytecodealliance/rustix/blob/main/CHANGELOG.md) - [Commits](https://github.com/bytecodealliance/rustix/compare/v0.38.42...v0.38.43) Updates `uuid-macro-internal` from 1.11.0 to 1.11.1 - [Release notes](https://github.com/uuid-rs/uuid/releases) - [Commits](https://github.com/uuid-rs/uuid/compare/1.11.0...1.11.1) Updates `winnow` from 0.6.22 to 0.6.24 - [Changelog](https://github.com/winnow-rs/winnow/blob/main/CHANGELOG.md) - [Commits](https://github.com/winnow-rs/winnow/compare/v0.6.22...v0.6.24) --- updated-dependencies: - dependency-name: thiserror dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: clap dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: proc-macro2 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: syn dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: uuid dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: serde_json dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: bitflags dependency-type: direct:production update-type: version-update:semver-minor dependency-group: firecracker - dependency-name: cc dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: clap_builder dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: clap_derive dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: linux-raw-sys dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: prettyplease dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: rustix dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: uuid-macro-internal dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: winnow dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker ... Signed-off-by: dependabot[bot] --- Cargo.lock | 104 +++++++++++++-------------- src/acpi-tables/Cargo.toml | 2 +- src/clippy-tracing/Cargo.toml | 8 +-- src/cpu-template-helper/Cargo.toml | 6 +- src/firecracker/Cargo.toml | 6 +- src/jailer/Cargo.toml | 2 +- src/log-instrument-macros/Cargo.toml | 4 +- src/rebase-snap/Cargo.toml | 2 +- src/seccompiler/Cargo.toml | 4 +- src/snapshot-editor/Cargo.toml | 4 +- src/utils/Cargo.toml | 2 +- src/vmm/Cargo.toml | 6 +- 12 files changed, 75 insertions(+), 75 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8fc1d38d933..3e3ae133038 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7,7 +7,7 @@ name = "acpi_tables" version = "0.1.0" dependencies = [ "displaydoc", - "thiserror 2.0.9", + "thiserror 2.0.11", "vm-memory", "zerocopy 0.8.14", ] @@ -187,7 +187,7 @@ version = "0.68.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "cexpr", "clang-sys", "lazy_static", @@ -207,7 +207,7 @@ version = "0.69.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "cexpr", "clang-sys", "itertools 0.10.5", @@ -232,9 +232,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +checksum = "1be3f42a67d6d345ecd59f675f3f012d6974981560836e938c22b424b85ce1be" [[package]] name = "byteorder" @@ -260,9 +260,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.7" +version = "1.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a012a0df96dd6d06ba9a1b29d6402d1a5d77c6befd2566afdc26e10603dc93d7" +checksum = "c8293772165d9345bdaaa39b45b2109591e63fe5e6fbc23c6ff930a048aa310b" dependencies = [ "jobserver", "libc", @@ -334,9 +334,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.23" +version = "4.5.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3135e7ec2ef7b10c6ed8950f0f792ed96ee093fa088608f1c76e569722700c84" +checksum = "a8eb5e908ef3a6efbe1ed62520fb7287959888c88485abe072543190ecc66783" dependencies = [ "clap_builder", "clap_derive", @@ -353,9 +353,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.23" +version = "4.5.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30582fc632330df2bd26877bde0c1f4470d57c582bbc070376afcd04d8cb4838" +checksum = "96b01801b5fc6a0a232407abc821660c9c6d25a1cafc0d4f85f29fb8d9afc121" dependencies = [ "anstream", "anstyle", @@ -365,9 +365,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.18" +version = "4.5.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" +checksum = "54b755194d6389280185988721fffba69495eed5ee9feeee9a599b53db80318c" dependencies = [ "heck", "proc-macro2", @@ -419,7 +419,7 @@ dependencies = [ "log-instrument", "serde", "serde_json", - "thiserror 2.0.9", + "thiserror 2.0.11", "vmm", "vmm-sys-util", ] @@ -613,7 +613,7 @@ dependencies = [ "serde", "serde_derive", "serde_json", - "thiserror 2.0.9", + "thiserror 2.0.11", "timerfd", "userfaultfd", "utils", @@ -633,7 +633,7 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31c683a9f13de31432e6097131d5f385898c7f0635c0f392b9d0fa165063c8ac" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "cfg-if", "log", "managed", @@ -798,7 +798,7 @@ dependencies = [ "libc", "log-instrument", "regex", - "thiserror 2.0.9", + "thiserror 2.0.11", "utils", "vmm-sys-util", ] @@ -829,7 +829,7 @@ version = "0.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e013ae7fcd2c6a8f384104d16afe7ea02969301ea2bb2a56e44b011ebc907cab" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "kvm-bindings", "libc", "vmm-sys-util", @@ -874,9 +874,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.14" +version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" [[package]] name = "log" @@ -947,7 +947,7 @@ version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "cfg-if", "libc", ] @@ -1024,9 +1024,9 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.27" +version = "0.2.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "483f8c21f64f3ea09fe0f30f5d48c3e8eefe5dac9129f0075f76593b4c1da705" +checksum = "6924ced06e1f7dfe3fa48d57b9f74f55d8915f5036121bef647ef4b204895fac" dependencies = [ "proc-macro2", "syn", @@ -1034,9 +1034,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.92" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" dependencies = [ "unicode-ident", ] @@ -1047,7 +1047,7 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14cae93065090804185d3b75f0bf93b8eeda30c7a9b4a33d3bdb3988d6229e50" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "lazy_static", "num-traits", "rand", @@ -1112,7 +1112,7 @@ dependencies = [ "displaydoc", "libc", "log-instrument", - "thiserror 2.0.9", + "thiserror 2.0.11", "utils", "vmm-sys-util", ] @@ -1154,11 +1154,11 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustix" -version = "0.38.42" +version = "0.38.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93dc38ecbab2eb790ff964bb77fa94faf256fd3e73285fd7ba0903b76bedb85" +checksum = "a78891ee6bf2340288408954ac787aa063d8e8817e9f53abb37c695c6d834ef6" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "errno", "libc", "linux-raw-sys", @@ -1190,7 +1190,7 @@ dependencies = [ "log-instrument", "serde", "serde_json", - "thiserror 2.0.9", + "thiserror 2.0.11", "utils", "vmm-sys-util", ] @@ -1226,9 +1226,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.134" +version = "1.0.135" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d00f4175c42ee48b15416f6193a959ba3a0d67fc699a0db9ad12df9f83991c7d" +checksum = "2b0d7ba2887406110130a978386c4e1befb98c674b4fba677954e4db976630d9" dependencies = [ "itoa", "memchr", @@ -1270,7 +1270,7 @@ dependencies = [ "libc", "log-instrument", "semver", - "thiserror 2.0.9", + "thiserror 2.0.11", "utils", "vmm", "vmm-sys-util", @@ -1290,9 +1290,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.95" +version = "2.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a" +checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" dependencies = [ "proc-macro2", "quote", @@ -1310,11 +1310,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.9" +version = "2.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f072643fd0190df67a8bab670c20ef5d8737177d6ac6b2e9a236cb096206b2cc" +checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" dependencies = [ - "thiserror-impl 2.0.9", + "thiserror-impl 2.0.11", ] [[package]] @@ -1330,9 +1330,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "2.0.9" +version = "2.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b50fa271071aae2e6ee85f842e2e28ba8cd2c5fb67f11fcb1fd70b276f9e7d4" +checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" dependencies = [ "proc-macro2", "quote", @@ -1438,7 +1438,7 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18d8b176d4d3e420685e964f87c25df5fdd5b26d7eb0d0e7c892d771f5b81035" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "cfg-if", "libc", "nix", @@ -1470,14 +1470,14 @@ dependencies = [ "displaydoc", "libc", "log-instrument", - "thiserror 2.0.9", + "thiserror 2.0.11", ] [[package]] name = "uuid" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +checksum = "b913a3b5fe84142e269d63cc62b64319ccaf89b748fc31fe025177f767a756c4" dependencies = [ "getrandom", "rand", @@ -1486,9 +1486,9 @@ dependencies = [ [[package]] name = "uuid-macro-internal" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b91f57fe13a38d0ce9e28a03463d8d3c2468ed03d75375110ec71d93b449a08" +checksum = "c91084647266237a48351d05d55dee65bba9e1b597f555fcf54680f820284a1c" dependencies = [ "proc-macro2", "quote", @@ -1507,7 +1507,7 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bce0aad4d8776cb64f1ac591e908a561c50ba6adac4416296efee590b155623f" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.7.0", "libc", "uuid", "vm-memory", @@ -1557,7 +1557,7 @@ dependencies = [ "aws-lc-rs", "base64", "bincode", - "bitflags 2.6.0", + "bitflags 2.7.0", "crc64", "criterion", "derive_more", @@ -1581,7 +1581,7 @@ dependencies = [ "serde", "serde_json", "slab", - "thiserror 2.0.9", + "thiserror 2.0.11", "timerfd", "userfaultfd", "utils", @@ -1749,9 +1749,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.6.22" +version = "0.6.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39281189af81c07ec09db316b302a3e67bf9bd7cbf6c820b50e35fee9c2fa980" +checksum = "c8d71a593cc5c42ad7876e2c1fda56f314f3754c084128833e64f1345ff8a03a" dependencies = [ "memchr", ] diff --git a/src/acpi-tables/Cargo.toml b/src/acpi-tables/Cargo.toml index 24924b0a691..17b5b5baa6f 100644 --- a/src/acpi-tables/Cargo.toml +++ b/src/acpi-tables/Cargo.toml @@ -8,7 +8,7 @@ license = "Apache-2.0" [dependencies] displaydoc = "0.2.5" -thiserror = "2.0.9" +thiserror = "2.0.11" vm-memory = { version = "0.16.1", features = ["backend-mmap", "backend-bitmap"] } zerocopy = { version = "0.8.14", features = ["derive"] } diff --git a/src/clippy-tracing/Cargo.toml b/src/clippy-tracing/Cargo.toml index 09c03b36b29..0594fc3e349 100644 --- a/src/clippy-tracing/Cargo.toml +++ b/src/clippy-tracing/Cargo.toml @@ -10,15 +10,15 @@ name = "clippy-tracing" bench = false [dependencies] -clap = { version = "4.5.23", features = ["derive"] } +clap = { version = "4.5.26", features = ["derive"] } itertools = "0.14.0" -proc-macro2 = { version = "1.0.92", features = ["span-locations"] } +proc-macro2 = { version = "1.0.93", features = ["span-locations"] } quote = "1.0.38" -syn = { version = "2.0.95", features = ["full", "extra-traits", "visit", "visit-mut", "printing"] } +syn = { version = "2.0.96", features = ["full", "extra-traits", "visit", "visit-mut", "printing"] } walkdir = "2.5.0" [dev-dependencies] -uuid = { version = "1.11.0", features = ["v4"] } +uuid = { version = "1.11.1", features = ["v4"] } [lints] workspace = true diff --git a/src/cpu-template-helper/Cargo.toml b/src/cpu-template-helper/Cargo.toml index a2075f6b3f3..6ff36523398 100644 --- a/src/cpu-template-helper/Cargo.toml +++ b/src/cpu-template-helper/Cargo.toml @@ -10,13 +10,13 @@ name = "cpu-template-helper" bench = false [dependencies] -clap = { version = "4.5.23", features = ["derive", "string"] } +clap = { version = "4.5.26", features = ["derive", "string"] } displaydoc = "0.2.5" libc = "0.2.169" log-instrument = { path = "../log-instrument", optional = true } serde = { version = "1.0.217", features = ["derive"] } -serde_json = "1.0.134" -thiserror = "2.0.9" +serde_json = "1.0.135" +thiserror = "2.0.11" vmm = { path = "../vmm" } vmm-sys-util = "0.12.1" diff --git a/src/firecracker/Cargo.toml b/src/firecracker/Cargo.toml index 3f9894f0144..55903bf2afe 100644 --- a/src/firecracker/Cargo.toml +++ b/src/firecracker/Cargo.toml @@ -25,8 +25,8 @@ micro_http = { git = "https://github.com/firecracker-microvm/micro-http" } seccompiler = { path = "../seccompiler" } serde = { version = "1.0.217", features = ["derive"] } serde_derive = "1.0.136" -serde_json = "1.0.134" -thiserror = "2.0.9" +serde_json = "1.0.135" +thiserror = "2.0.11" timerfd = "1.6.0" utils = { path = "../utils" } vmm = { path = "../vmm" } @@ -45,7 +45,7 @@ userfaultfd = "0.8.1" bincode = "1.2.1" seccompiler = { path = "../seccompiler" } serde = { version = "1.0.217" } -serde_json = "1.0.134" +serde_json = "1.0.135" [features] tracing = ["log-instrument", "seccompiler/tracing", "utils/tracing", "vmm/tracing"] diff --git a/src/jailer/Cargo.toml b/src/jailer/Cargo.toml index 4c29f53a0e1..be231386020 100644 --- a/src/jailer/Cargo.toml +++ b/src/jailer/Cargo.toml @@ -15,7 +15,7 @@ bench = false libc = "0.2.169" log-instrument = { path = "../log-instrument", optional = true } regex = { version = "1.11.1", default-features = false, features = ["std"] } -thiserror = "2.0.9" +thiserror = "2.0.11" vmm-sys-util = "0.12.1" utils = { path = "../utils" } diff --git a/src/log-instrument-macros/Cargo.toml b/src/log-instrument-macros/Cargo.toml index 549a851a0da..471b4b66159 100644 --- a/src/log-instrument-macros/Cargo.toml +++ b/src/log-instrument-macros/Cargo.toml @@ -11,9 +11,9 @@ proc-macro = true bench = false [dependencies] -proc-macro2 = "1.0.92" +proc-macro2 = "1.0.93" quote = "1.0.38" -syn = { version = "2.0.95", features = ["full", "extra-traits"] } +syn = { version = "2.0.96", features = ["full", "extra-traits"] } [lints] workspace = true diff --git a/src/rebase-snap/Cargo.toml b/src/rebase-snap/Cargo.toml index 10d93be2712..f52933ddaae 100644 --- a/src/rebase-snap/Cargo.toml +++ b/src/rebase-snap/Cargo.toml @@ -13,7 +13,7 @@ bench = false displaydoc = "0.2.5" libc = "0.2.169" log-instrument = { path = "../log-instrument", optional = true } -thiserror = "2.0.9" +thiserror = "2.0.11" vmm-sys-util = "0.12.1" utils = { path = "../utils" } diff --git a/src/seccompiler/Cargo.toml b/src/seccompiler/Cargo.toml index 67197bfb82b..a991d3652d5 100644 --- a/src/seccompiler/Cargo.toml +++ b/src/seccompiler/Cargo.toml @@ -21,8 +21,8 @@ displaydoc = "0.2.5" libc = "0.2.169" log-instrument = { path = "../log-instrument", optional = true } serde = { version = "1.0.217", features = ["derive"] } -serde_json = "1.0.134" -thiserror = "2.0.9" +serde_json = "1.0.135" +thiserror = "2.0.11" utils = { path = "../utils" } diff --git a/src/snapshot-editor/Cargo.toml b/src/snapshot-editor/Cargo.toml index f57268656c4..a9bd3143b86 100644 --- a/src/snapshot-editor/Cargo.toml +++ b/src/snapshot-editor/Cargo.toml @@ -10,14 +10,14 @@ name = "snapshot-editor" bench = false [dependencies] -clap = { version = "4.5.23", features = ["derive", "string"] } +clap = { version = "4.5.26", features = ["derive", "string"] } displaydoc = "0.2.5" fc_utils = { package = "utils", path = "../utils" } libc = "0.2.169" log-instrument = { path = "../log-instrument", optional = true } semver = "1.0.24" -thiserror = "2.0.9" +thiserror = "2.0.11" vmm = { path = "../vmm" } vmm-sys-util = "0.12.1" diff --git a/src/utils/Cargo.toml b/src/utils/Cargo.toml index bf8a5092968..d8b13452089 100644 --- a/src/utils/Cargo.toml +++ b/src/utils/Cargo.toml @@ -12,7 +12,7 @@ bench = false displaydoc = "0.2.5" libc = "0.2.169" log-instrument = { path = "../log-instrument", optional = true } -thiserror = "2.0.9" +thiserror = "2.0.11" [features] tracing = ["log-instrument"] diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index c73b090a2dc..8773198c002 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -15,7 +15,7 @@ arrayvec = { version = "0.7.6", optional = true } aws-lc-rs = { version = "1.12.0", features = ["bindgen"] } base64 = "0.22.1" bincode = "1.2.1" -bitflags = "2.6.0" +bitflags = "2.7.0" crc64 = "2.0.0" derive_more = { version = "1.0.0", default-features = false, features = ["from", "display"] } displaydoc = "0.2.5" @@ -34,9 +34,9 @@ micro_http = { git = "https://github.com/firecracker-microvm/micro-http" } seccompiler = { path = "../seccompiler" } semver = { version = "1.0.24", features = ["serde"] } serde = { version = "1.0.217", features = ["derive", "rc"] } -serde_json = "1.0.134" +serde_json = "1.0.135" slab = "0.4.7" -thiserror = "2.0.9" +thiserror = "2.0.11" timerfd = "1.5.0" userfaultfd = "0.8.1" utils = { path = "../utils" } From f7bceb6186c846bcd338299f3b91dca5d95bb21c Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Thu, 16 Jan 2025 07:32:45 +0000 Subject: [PATCH 28/78] devtool: Build A and B binaries if $do_build is true If `cmd_test` is instructed to build its own binaries (e.g. being ran outside of CI), and the environment specifies that A/B-tests should be ran, then we not only need to build the currently checked out commit (the "B" revision), but also an "A" binary based on the value of the `BUILDKITE_PULL_REQUEST_BASE_BRANCH` environment variable. Signed-off-by: Patrick Roy --- tools/devtool | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/devtool b/tools/devtool index 3c4e8834d52..2ab7969484b 100755 --- a/tools/devtool +++ b/tools/devtool @@ -722,7 +722,12 @@ cmd_test() { ensure_devctr ensure_build_dir ensure_ci_artifacts - [ $do_build != 0 ] && cmd_build --release + if [ $do_build != 0 ]; then + cmd_build --release + if [ -n "$BUILDKITE_PULL_REQUEST_BASE_BRANCH" ]; then + cmd_build --release --rev "$BUILDKITE_PULL_REQUEST_BASE_BRANCH" + fi + fi apply_linux_61_tweaks From 9ac6f3ab8db4d790e67b04f1bbca7c6c3c41fa4d Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Thu, 16 Jan 2025 07:34:08 +0000 Subject: [PATCH 29/78] doc: Add instruction about running functional A/B-tests locally A quick blurb about which environment variables need to be set to run things like test_vulnerabilities.py locally. Signed-off-by: Patrick Roy --- tests/README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/README.md b/tests/README.md index 91fb2a9aeed..1d7b7075f62 100644 --- a/tests/README.md +++ b/tests/README.md @@ -142,6 +142,21 @@ above when run on a PR will fail iff a newly added dependency has a known open RustSec advisory. If run outside a PR, it will fail if any existing dependency has an open RustSec advisory). +### Functional A/B-Tests + +Firecracker has some functional A/B-tests (for example, in +`test_vulnerabilities.py`), which generally compare the state of the pull +request target branch (e.g. `main`), with the PR head. However, when running +these locally, pytest does not know anything about potential PRs that the commit +the tests are being run on are contained in, and as such cannot do this +A/B-Test. To run functional A/B-Tests locally, you need to create a "fake" PR +environment by setting the `BUILDKITE_PULL_REQUEST` and +`BUILDKITE_PULL_REQUEST_BASE_BRANCH` environment variables: + +``` +BUILDKITE_PULL_REQUEST=true BUILDKITE_PULL_REQUEST_BASE_BRANCH=main ./tools/devtool test -- integration_tests/security/test_vulnerabilities.py +``` + ### Performance A/B-Tests Firecracker has a special framework for orchestrating long-running A/B-tests From 14cb346eeaa7a47c57364ebcafde396d01c9e776 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Fri, 22 Nov 2024 13:51:33 +0000 Subject: [PATCH 30/78] feat(seccomp): update seccompiler to use libseccomp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit libseccomp provides a better quality compiler for BPF seccomp programs than our current implementation. In our testing it produces BPF code with ~65% less instructions which makes final binaries smaller which in turn makes Firecracker binary smaller because we include them into Firecracker at build time. For this transition we create a minimal set of bindings for `libseccomp` in order to simplify maintenance and avoid adding additional dependencies. The only tricky issue with this transition is the way `ioctl` and other syscalls are checked with libseccomp. It always adds a check for the high bits of the request to be 0. Unfortunately when we build with `musl`, some syscalls like `ioctl` have upper bits set to 1. Because of this, we replace `Eq` with `MaskedEq` with mask `0x00000000FFFFFFFF` when the argument is 32bits. This commit also removes dependency of firecracker and vmm crates on the seccompiler crate. Co-authored-by: Pablo Barbáchano Signed-off-by: Egor Lazarchuk --- Cargo.lock | 7 +- src/cpu-template-helper/src/utils/mod.rs | 2 +- src/firecracker/Cargo.toml | 4 +- src/firecracker/build.rs | 21 +- src/firecracker/examples/seccomp/jailer.rs | 2 +- src/firecracker/examples/seccomp/panic.rs | 2 +- src/firecracker/src/api_server/mod.rs | 6 +- src/firecracker/src/api_server_adapter.rs | 2 +- src/firecracker/src/main.rs | 2 +- src/firecracker/src/seccomp.rs | 5 +- src/seccompiler/Cargo.toml | 13 +- src/seccompiler/build.rs | 6 + src/seccompiler/src/backend.rs | 1814 ------------------ src/seccompiler/src/bin.rs | 40 + src/seccompiler/src/bindings.rs | 171 ++ src/seccompiler/src/common.rs | 25 - src/seccompiler/src/compiler.rs | 540 ------ src/seccompiler/src/lib.rs | 399 ++-- src/seccompiler/src/seccompiler_bin.rs | 578 ------ src/seccompiler/src/syscall_table/aarch64.rs | 308 --- src/seccompiler/src/syscall_table/mod.rs | 66 - src/seccompiler/src/syscall_table/x86_64.rs | 364 ---- src/seccompiler/src/types.rs | 192 ++ src/vmm/Cargo.toml | 1 - src/vmm/src/builder.rs | 8 +- src/vmm/src/lib.rs | 6 +- src/vmm/src/persist.rs | 2 +- src/vmm/src/rpc_interface.rs | 5 +- src/vmm/src/seccomp.rs | 238 +++ src/vmm/src/seccomp_filters.rs | 14 - src/vmm/src/signal_handler.rs | 144 -- src/vmm/src/test_utils/mod.rs | 2 +- src/vmm/src/vstate/vcpu/mod.rs | 6 +- src/vmm/tests/integration_tests.rs | 2 +- 34 files changed, 835 insertions(+), 4162 deletions(-) create mode 100644 src/seccompiler/build.rs delete mode 100644 src/seccompiler/src/backend.rs create mode 100644 src/seccompiler/src/bin.rs create mode 100644 src/seccompiler/src/bindings.rs delete mode 100644 src/seccompiler/src/common.rs delete mode 100644 src/seccompiler/src/compiler.rs delete mode 100644 src/seccompiler/src/seccompiler_bin.rs delete mode 100644 src/seccompiler/src/syscall_table/aarch64.rs delete mode 100644 src/seccompiler/src/syscall_table/mod.rs delete mode 100644 src/seccompiler/src/syscall_table/x86_64.rs create mode 100644 src/seccompiler/src/types.rs create mode 100644 src/vmm/src/seccomp.rs delete mode 100644 src/vmm/src/seccomp_filters.rs diff --git a/Cargo.lock b/Cargo.lock index 3e3ae133038..1e9a4972b07 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -601,7 +601,6 @@ dependencies = [ name = "firecracker" version = "1.11.0-dev" dependencies = [ - "bincode", "cargo_toml", "displaydoc", "event-manager", @@ -1185,14 +1184,13 @@ name = "seccompiler" version = "1.11.0-dev" dependencies = [ "bincode", + "clap", "displaydoc", "libc", - "log-instrument", "serde", "serde_json", "thiserror 2.0.11", - "utils", - "vmm-sys-util", + "zerocopy 0.8.14", ] [[package]] @@ -1576,7 +1574,6 @@ dependencies = [ "memfd", "micro_http", "proptest", - "seccompiler", "semver", "serde", "serde_json", diff --git a/src/cpu-template-helper/src/utils/mod.rs b/src/cpu-template-helper/src/utils/mod.rs index bd570840fc5..b6d3465efd5 100644 --- a/src/cpu-template-helper/src/utils/mod.rs +++ b/src/cpu-template-helper/src/utils/mod.rs @@ -12,7 +12,7 @@ use std::sync::{Arc, Mutex}; use vmm::builder::{build_microvm_for_boot, StartMicrovmError}; use vmm::cpu_config::templates::{CustomCpuTemplate, Numeric}; use vmm::resources::VmResources; -use vmm::seccomp_filters::get_empty_filters; +use vmm::seccomp::get_empty_filters; use vmm::vmm_config::instance_info::{InstanceInfo, VmState}; use vmm::{EventManager, Vmm, HTTP_MAX_PAYLOAD_SIZE}; use vmm_sys_util::tempfile::TempFile; diff --git a/src/firecracker/Cargo.toml b/src/firecracker/Cargo.toml index 55903bf2afe..e6af962ea20 100644 --- a/src/firecracker/Cargo.toml +++ b/src/firecracker/Cargo.toml @@ -22,7 +22,6 @@ libc = "0.2.169" log-instrument = { path = "../log-instrument", optional = true } micro_http = { git = "https://github.com/firecracker-microvm/micro-http" } -seccompiler = { path = "../seccompiler" } serde = { version = "1.0.217", features = ["derive"] } serde_derive = "1.0.136" serde_json = "1.0.135" @@ -42,13 +41,12 @@ serde = { version = "1.0.217", features = ["derive"] } userfaultfd = "0.8.1" [build-dependencies] -bincode = "1.2.1" seccompiler = { path = "../seccompiler" } serde = { version = "1.0.217" } serde_json = "1.0.135" [features] -tracing = ["log-instrument", "seccompiler/tracing", "utils/tracing", "vmm/tracing"] +tracing = ["log-instrument", "utils/tracing", "vmm/tracing"] gdb = ["vmm/gdb"] [lints] diff --git a/src/firecracker/build.rs b/src/firecracker/build.rs index b20e1cd4e1e..87710b54fc4 100644 --- a/src/firecracker/build.rs +++ b/src/firecracker/build.rs @@ -1,13 +1,8 @@ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 -use std::collections::BTreeMap; -use std::fs::File; use std::path::Path; -use seccompiler::common::BpfProgram; -use seccompiler::compiler::{Compiler, JsonFile}; - const ADVANCED_BINARY_FILTER_FILE_NAME: &str = "seccomp_filter.bpf"; const JSON_DIR: &str = "../../resources/seccomp"; @@ -44,19 +39,7 @@ fn main() { // Also retrigger the build script on any seccompiler source code change. println!("cargo:rerun-if-changed={}", SECCOMPILER_SRC_DIR); - let input = std::fs::read_to_string(seccomp_json_path).expect("Correct input file"); - let filters: JsonFile = serde_json::from_str(&input).expect("Input read"); - - let arch = target_arch.as_str().try_into().expect("Target"); - let compiler = Compiler::new(arch); - - // transform the IR into a Map of BPFPrograms - let bpf_data: BTreeMap = compiler - .compile_blob(filters.0, false) - .expect("Successfull compilation"); - - // serialize the BPF programs & output them to a file let out_path = format!("{}/{}", out_dir, ADVANCED_BINARY_FILTER_FILE_NAME); - let output_file = File::create(out_path).expect("Create seccompiler output path"); - bincode::serialize_into(output_file, &bpf_data).expect("Seccompiler serialization"); + seccompiler::compile_bpf(&seccomp_json_path, &target_arch, &out_path, false) + .expect("Cannot compile seccomp filters"); } diff --git a/src/firecracker/examples/seccomp/jailer.rs b/src/firecracker/examples/seccomp/jailer.rs index f82e3f5e249..47f4a667749 100644 --- a/src/firecracker/examples/seccomp/jailer.rs +++ b/src/firecracker/examples/seccomp/jailer.rs @@ -5,7 +5,7 @@ use std::fs::File; use std::os::unix::process::CommandExt; use std::process::{Command, Stdio}; -use seccompiler::{apply_filter, deserialize_binary}; +use vmm::seccomp::{apply_filter, deserialize_binary}; fn main() { let args: Vec = args().collect(); diff --git a/src/firecracker/examples/seccomp/panic.rs b/src/firecracker/examples/seccomp/panic.rs index 7998552a4d1..315899872f4 100644 --- a/src/firecracker/examples/seccomp/panic.rs +++ b/src/firecracker/examples/seccomp/panic.rs @@ -3,7 +3,7 @@ use std::env::args; use std::fs::File; -use seccompiler::{apply_filter, deserialize_binary}; +use vmm::seccomp::{apply_filter, deserialize_binary}; fn main() { let args: Vec = args().collect(); diff --git a/src/firecracker/src/api_server/mod.rs b/src/firecracker/src/api_server/mod.rs index 6ac2955af8f..a2edce205cd 100644 --- a/src/firecracker/src/api_server/mod.rs +++ b/src/firecracker/src/api_server/mod.rs @@ -14,13 +14,13 @@ use std::sync::mpsc; pub use micro_http::{Body, HttpServer, Request, Response, ServerError, StatusCode, Version}; use parsed_request::{ParsedRequest, RequestAction}; -use seccompiler::BpfProgramRef; use serde_json::json; use utils::time::{get_time_us, ClockType}; use vmm::logger::{ debug, error, info, update_metric_with_elapsed_time, warn, ProcessTimeReporter, METRICS, }; use vmm::rpc_interface::{ApiRequest, ApiResponse, VmmAction}; +use vmm::seccomp::BpfProgramRef; use vmm::vmm_config::snapshot::SnapshotType; use vmm_sys_util::eventfd::EventFd; @@ -78,7 +78,7 @@ impl ApiServer { // Load seccomp filters on the API thread. // Execution panics if filters cannot be loaded, use --no-seccomp if skipping filters // altogether is the desired behaviour. - if let Err(err) = seccompiler::apply_filter(seccomp_filter) { + if let Err(err) = vmm::seccomp::apply_filter(seccomp_filter) { panic!( "Failed to set the requested seccomp filters on the API thread: {}", err @@ -208,7 +208,7 @@ mod tests { use vmm::builder::StartMicrovmError; use vmm::logger::StoreMetric; use vmm::rpc_interface::{VmmActionError, VmmData}; - use vmm::seccomp_filters::get_empty_filters; + use vmm::seccomp::get_empty_filters; use vmm::vmm_config::instance_info::InstanceInfo; use vmm::vmm_config::snapshot::CreateSnapshotParams; use vmm_sys_util::tempfile::TempFile; diff --git a/src/firecracker/src/api_server_adapter.rs b/src/firecracker/src/api_server_adapter.rs index ffc4732025d..776d03a4daa 100644 --- a/src/firecracker/src/api_server_adapter.rs +++ b/src/firecracker/src/api_server_adapter.rs @@ -8,13 +8,13 @@ use std::sync::{Arc, Mutex}; use std::thread; use event_manager::{EventOps, Events, MutEventSubscriber, SubscriberOps}; -use seccompiler::BpfThreadMap; use vmm::logger::{error, warn, ProcessTimeReporter}; use vmm::resources::VmResources; use vmm::rpc_interface::{ ApiRequest, ApiResponse, BuildMicrovmFromRequestsError, PrebootApiController, RuntimeApiController, VmmAction, }; +use vmm::seccomp::BpfThreadMap; use vmm::vmm_config::instance_info::InstanceInfo; use vmm::{EventManager, FcExitCode, Vmm}; use vmm_sys_util::epoll::EventSet; diff --git a/src/firecracker/src/main.rs b/src/firecracker/src/main.rs index 300afd0ad66..27a9957d448 100644 --- a/src/firecracker/src/main.rs +++ b/src/firecracker/src/main.rs @@ -17,7 +17,6 @@ use std::{io, panic}; use api_server_adapter::ApiServerError; use event_manager::SubscriberOps; use seccomp::FilterError; -use seccompiler::BpfThreadMap; use utils::arg_parser::{ArgParser, Argument}; use utils::validators::validate_instance_id; use vmm::arch::host_page_size; @@ -27,6 +26,7 @@ use vmm::logger::{ }; use vmm::persist::SNAPSHOT_VERSION; use vmm::resources::VmResources; +use vmm::seccomp::BpfThreadMap; use vmm::signal_handler::register_signal_handlers; use vmm::snapshot::{Snapshot, SnapshotError}; use vmm::vmm_config::instance_info::{InstanceInfo, VmState}; diff --git a/src/firecracker/src/seccomp.rs b/src/firecracker/src/seccomp.rs index 5794d6498a8..2c7b3ddecd8 100644 --- a/src/firecracker/src/seccomp.rs +++ b/src/firecracker/src/seccomp.rs @@ -5,8 +5,7 @@ use std::fs::File; use std::io::{BufReader, Read}; use std::path::Path; -use seccompiler::{deserialize_binary, BpfThreadMap, DeserializationError}; -use vmm::seccomp_filters::get_empty_filters; +use vmm::seccomp::{deserialize_binary, get_empty_filters, BpfThreadMap, DeserializationError}; const THREAD_CATEGORIES: [&str; 3] = ["vmm", "api", "vcpu"]; @@ -118,7 +117,7 @@ fn filter_thread_categories(map: BpfThreadMap) -> Result Deserialize<'de> for Comment { - fn deserialize(_deserializer: D) -> std::result::Result - where - D: Deserializer<'de>, - { - String::deserialize(_deserializer)?; - - Ok(Comment {}) - } -} - -/// Seccomp filter errors. -#[derive(Debug, PartialEq, thiserror::Error, displaydoc::Display)] -pub enum FilterError { - /// The seccomp rules vector is empty. - EmptyRulesVector, - /// The seccomp filter contains too many BPF instructions. - FilterTooLarge, - /// The seccomp rule contains an invalid argument number. - InvalidArgumentNumber, - /// {0} - Arch(TargetArchError), - /// Syscall {0} has conflicting rules. - ConflictingRules(i64), -} - -/// Supported target architectures. -#[allow(non_camel_case_types)] -#[derive(Debug, PartialEq, Clone, Copy)] -pub enum TargetArch { - /// x86_64 arch - x86_64, - /// aarch64 arch - aarch64, -} - -/// Errors related to target arch. -#[derive(Debug, PartialEq, thiserror::Error, displaydoc::Display)] -pub enum TargetArchError { - /// Invalid target arch string: {0} - InvalidString(String), -} - -impl TargetArch { - /// Get the arch audit value. - fn get_audit_value(self) -> u32 { - match self { - TargetArch::x86_64 => AUDIT_ARCH_X86_64, - TargetArch::aarch64 => AUDIT_ARCH_AARCH64, - } - } - - /// Get the string representation. - fn to_string(self) -> &'static str { - match self { - TargetArch::x86_64 => "x86_64", - TargetArch::aarch64 => "aarch64", - } - } -} - -impl TryInto for &str { - type Error = TargetArchError; - fn try_into(self) -> std::result::Result { - match self.to_lowercase().as_str() { - "x86_64" => Ok(TargetArch::x86_64), - "aarch64" => Ok(TargetArch::aarch64), - _ => Err(TargetArchError::InvalidString(self.to_string())), - } - } -} - -impl From for &str { - fn from(target_arch: TargetArch) -> Self { - target_arch.to_string() - } -} - -/// Comparison to perform when matching a condition. -#[derive(Clone, Debug, Deserialize, PartialEq)] -#[serde(rename_all = "snake_case")] -pub enum SeccompCmpOp { - /// Argument value is equal to the specified value. - Eq, - /// Argument value is greater than or equal to the specified value. - Ge, - /// Argument value is greater than specified value. - Gt, - /// Argument value is less than or equal to the specified value. - Le, - /// Argument value is less than specified value. - Lt, - /// Masked bits of argument value are equal to masked bits of specified value. - MaskedEq(u64), - /// Argument value is not equal to specified value. - Ne, -} - -/// Seccomp argument value length. -#[derive(Clone, Debug, Deserialize, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum SeccompCmpArgLen { - /// Argument value length is 4 bytes. - Dword, - /// Argument value length is 8 bytes. - Qword, -} - -/// Condition that syscall must match in order to satisfy a rule. -#[derive(Clone, Debug, PartialEq, Deserialize)] -#[serde(deny_unknown_fields)] -pub struct SeccompCondition { - /// Index of the argument that is to be compared. - #[serde(rename = "index")] - arg_number: u8, - /// Length of the argument value that is to be compared. - #[serde(rename = "type")] - arg_len: SeccompCmpArgLen, - /// Comparison to perform. - #[serde(rename = "op")] - operator: SeccompCmpOp, - /// The value that will be compared with the argument value. - #[serde(rename = "val")] - value: u64, - /// Optional empty value, represents a `comment` property in the JSON file. - comment: Option, -} - -/// Actions that `seccomp` can apply to process calling a syscall. -#[derive(Clone, Debug, PartialEq, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum SeccompAction { - /// Allows syscall. - Allow, - /// Returns from syscall with specified error number. - Errno(u32), - /// Kills calling thread. - KillThread, - /// Kills calling process. - KillProcess, - /// Same as allow but logs call. - Log, - /// Notifies tracing process of the caller with respective number. - Trace(u32), - /// Sends `SIGSYS` to the calling process. - Trap, -} - -/// Rule that `seccomp` attempts to match for a syscall. -/// -/// If all conditions match then rule gets matched. -/// The action of the first rule that matches will be applied to the calling process. -/// If no rule matches the default action is applied. -#[derive(Clone, Debug, PartialEq)] -pub struct SeccompRule { - /// Conditions of rule that need to match in order for the rule to get matched. - conditions: Vec, - /// Action applied to calling process if rule gets matched. - action: SeccompAction, -} - -/// Type that associates the syscall number to its SeccompRules. -pub type SeccompRuleMap = BTreeMap>; - -/// Filter containing rules assigned to syscall numbers. -#[derive(Clone, Debug, PartialEq)] -pub struct SeccompFilter { - /// Map of syscall numbers and corresponding rule chains. - rules: SeccompRuleMap, - /// Default action to apply to syscall numbers that do not exist in the hash map. - default_action: SeccompAction, - /// Target architecture of the generated BPF filter. - target_arch: TargetArch, -} - -impl SeccompCondition { - /// Validates the SeccompCondition data - pub fn validate(&self) -> Result<(), FilterError> { - // Checks that the given argument number is valid. - if self.arg_number > ARG_NUMBER_MAX { - return Err(FilterError::InvalidArgumentNumber); - } - - Ok(()) - } - - /// Splits the [`SeccompCondition`] into 32 bit chunks and offsets. - /// - /// Returns most significant half, least significant half of the `value` field of - /// [`SeccompCondition`], as well as the offsets of the most significant and least significant - /// half of the argument specified by `arg_number` relative to `struct seccomp_data` passed to - /// the BPF program by the kernel. - /// - /// [`SeccompCondition`]: struct.SeccompCondition.html - fn value_segments(&self) -> (u32, u32, u8, u8) { - // Splits the specified value into its most significant and least significant halves. - let (msb, lsb) = ((self.value >> 32) as u32, (self.value & 0xFFFFFFFF) as u32); - - // Offset to the argument specified by `arg_number`. - // Cannot overflow because the value will be at most 16 + 6 * 8 = 64. - let arg_offset = SECCOMP_DATA_ARGS_OFFSET + self.arg_number * SECCOMP_DATA_ARG_SIZE; - - // Extracts offsets of most significant and least significant halves of argument. - // Addition cannot overflow because it's at most `arg_offset` + 4 = 68. - let (msb_offset, lsb_offset) = { (arg_offset + SECCOMP_DATA_ARG_SIZE / 2, arg_offset) }; - - (msb, lsb, msb_offset, lsb_offset) - } - - /// Translates the `eq` (equal) condition into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - /// - /// The jump is performed if the condition fails and thus the current rule does not match so - /// `seccomp` tries to match the next rule by jumping out of the current rule. - /// - /// In case the condition is part of the last rule, the jump offset is to the default action of - /// respective filter. - /// - /// The most significant and least significant halves of the argument value are compared - /// separately since the BPF operand and accumulator are 4 bytes whereas an argument value is 8. - fn into_eq_bpf(self, offset: u8) -> Vec { - let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::Dword => vec![], - SeccompCmpArgLen::Qword => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, offset + 2), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, lsb, 0, offset), - ]); - bpf - } - - /// Translates the `ge` (greater than or equal) condition into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - fn into_ge_bpf(self, offset: u8) -> Vec { - let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::Dword => vec![], - SeccompCmpArgLen::Qword => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, msb, 3, 0), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, offset + 2), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGE + BPF_K, lsb, 0, offset), - ]); - bpf - } - - /// Translates the `gt` (greater than) condition into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - fn into_gt_bpf(self, offset: u8) -> Vec { - let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::Dword => vec![], - SeccompCmpArgLen::Qword => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, msb, 3, 0), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, offset + 2), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, lsb, 0, offset), - ]); - bpf - } - - /// Translates the `le` (less than or equal) condition into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - fn into_le_bpf(self, offset: u8) -> Vec { - let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::Dword => vec![], - SeccompCmpArgLen::Qword => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, msb, offset + 3, 0), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, 2), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, lsb, offset, 0), - ]); - bpf - } - - /// Translates the `lt` (less than) condition into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - fn into_lt_bpf(self, offset: u8) -> Vec { - let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::Dword => vec![], - SeccompCmpArgLen::Qword => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, msb, offset + 3, 0), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, 2), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_JUMP(BPF_JMP + BPF_JGE + BPF_K, lsb, offset, 0), - ]); - bpf - } - - /// Translates the `masked_eq` (masked equal) condition into BPF statements. - /// - /// The `masked_eq` condition is `true` if the result of logical `AND` between the given value - /// and the mask is the value being compared against. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - fn into_masked_eq_bpf(self, offset: u8, mask: u64) -> Vec { - let (_, _, msb_offset, lsb_offset) = self.value_segments(); - let masked_value = self.value & mask; - let (msb, lsb) = ( - (masked_value >> 32) as u32, - (masked_value & 0xFFFFFFFF) as u32, - ); - let (mask_msb, mask_lsb) = ((mask >> 32) as u32, (mask & 0xFFFFFFFF) as u32); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::Dword => vec![], - SeccompCmpArgLen::Qword => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_STMT(BPF_ALU + BPF_AND + BPF_K, mask_msb), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, offset + 3), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_STMT(BPF_ALU + BPF_AND + BPF_K, mask_lsb), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, lsb, 0, offset), - ]); - bpf - } - - /// Translates the `ne` (not equal) condition into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - fn into_ne_bpf(self, offset: u8) -> Vec { - let (msb, lsb, msb_offset, lsb_offset) = self.value_segments(); - - let mut bpf = match self.arg_len { - SeccompCmpArgLen::Dword => vec![], - SeccompCmpArgLen::Qword => vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(msb_offset)), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, msb, 0, 2), - ], - }; - - bpf.append(&mut vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, u32::from(lsb_offset)), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, lsb, offset, 0), - ]); - bpf - } - - /// Translates the [`SeccompCondition`] into BPF statements. - /// - /// # Arguments - /// - /// * `offset` - The given jump offset to the start of the next rule. - /// - /// [`SeccompCondition`]: struct.SeccompCondition.html - fn into_bpf(self, offset: u8) -> Vec { - let result = match self.operator { - SeccompCmpOp::Eq => self.into_eq_bpf(offset), - SeccompCmpOp::Ge => self.into_ge_bpf(offset), - SeccompCmpOp::Gt => self.into_gt_bpf(offset), - SeccompCmpOp::Le => self.into_le_bpf(offset), - SeccompCmpOp::Lt => self.into_lt_bpf(offset), - SeccompCmpOp::MaskedEq(mask) => self.into_masked_eq_bpf(offset, mask), - SeccompCmpOp::Ne => self.into_ne_bpf(offset), - }; - - // Verifies that the `CONDITION_MAX_LEN` constant was properly updated. - assert!(result.len() <= CONDITION_MAX_LEN as usize); - - result - } -} - -impl From for u32 { - /// Return codes of the BPF program for each action. - /// - /// # Arguments - /// - /// * `action` - The [`SeccompAction`] that the kernel will take. - /// - /// [`SeccompAction`]: struct.SeccompAction.html - fn from(action: SeccompAction) -> Self { - match action { - SeccompAction::Allow => SECCOMP_RET_ALLOW, - SeccompAction::Errno(x) => SECCOMP_RET_ERRNO | (x & SECCOMP_RET_MASK), - SeccompAction::KillThread => SECCOMP_RET_KILL_THREAD, - SeccompAction::KillProcess => SECCOMP_RET_KILL_PROCESS, - SeccompAction::Log => SECCOMP_RET_LOG, - SeccompAction::Trace(x) => SECCOMP_RET_TRACE | (x & SECCOMP_RET_MASK), - SeccompAction::Trap => SECCOMP_RET_TRAP, - } - } -} - -impl SeccompRule { - /// Creates a new rule. Rules with 0 conditions always match. - /// - /// # Arguments - /// - /// * `conditions` - Vector of [`SeccompCondition`] that the syscall must match. - /// * `action` - Action taken if the syscall matches the conditions. See [`SeccompAction`]. - /// - /// [`SeccompCondition`]: struct.SeccompCondition.html - /// [`SeccompAction`]: struct.SeccompAction.html - pub fn new(conditions: Vec, action: SeccompAction) -> Self { - Self { conditions, action } - } - - /// Appends a condition of the rule to an accumulator. - /// - /// The length of the rule and offset to the next rule are updated. - /// - /// # Arguments - /// - /// * `condition` - The condition added to the rule. - /// * `accumulator` - Accumulator of BPF statements that compose the BPF program. - /// * `rule_len` - Number of conditions in the rule. - /// * `offset` - Offset (in number of BPF statements) to the next rule. - fn append_condition( - condition: SeccompCondition, - accumulator: &mut Vec>, - rule_len: &mut usize, - offset: &mut u8, - ) { - // Tries to detect whether prepending the current condition will produce an unjumpable - // offset (since BPF jumps are a maximum of 255 instructions, which is u8::MAX). - if offset.checked_add(CONDITION_MAX_LEN + 1).is_none() { - // If that is the case, three additional helper jumps are prepended and the offset - // is reset to 1. - // - // - The first jump continues the evaluation of the condition chain by jumping to the - // next condition or the action of the rule if the last condition was matched. - // - The second, jumps out of the rule, to the next rule or the default action of the - // filter in case of the last rule in the rule chain of a syscall. - // - The third jumps out of the rule chain of the syscall, to the rule chain of the next - // syscall number to be checked or the default action of the filter in the case of the - // last rule chain. - let helper_jumps = vec![ - BPF_STMT(BPF_JMP + BPF_JA, 2), - BPF_STMT(BPF_JMP + BPF_JA, u32::from(*offset) + 1), - BPF_STMT(BPF_JMP + BPF_JA, u32::from(*offset) + 1), - ]; - *rule_len += helper_jumps.len(); - accumulator.push(helper_jumps); - *offset = 1; - } - - let condition = condition.into_bpf(*offset); - *rule_len += condition.len(); - // Safe to unwrap since we checked that condition length is less than `CONDITION_MAX_LEN`. - *offset += u8::try_from(condition.len()).unwrap(); - accumulator.push(condition); - } -} - -impl From for BpfProgram { - /// Translates a rule into BPF statements. - /// - /// Each rule starts with 2 jump statements: - /// * The first jump enters the rule, attempting a match. - /// * The second jump points to the end of the rule chain for one syscall, into the rule chain - /// for the next syscall or the default action if the current syscall is the last one. It - /// essentially jumps out of the current rule chain. - fn from(rule: SeccompRule) -> Self { - // Rule is built backwards, last statement is the action of the rule. - // The offset to the next rule is 1. - let mut accumulator = - Vec::with_capacity(rule.conditions.len() * CONDITION_MAX_LEN as usize); - let mut rule_len = 1; - let mut offset = 1; - accumulator.push(vec![BPF_STMT(BPF_RET + BPF_K, u32::from(rule.action))]); - - // Conditions are translated into BPF statements and prepended to the rule. - rule.conditions.into_iter().for_each(|condition| { - SeccompRule::append_condition(condition, &mut accumulator, &mut rule_len, &mut offset) - }); - - // The two initial jump statements are prepended to the rule. - let rule_jumps = vec![ - BPF_STMT(BPF_JMP + BPF_JA, 1), - BPF_STMT(BPF_JMP + BPF_JA, u32::from(offset) + 1), - ]; - rule_len += rule_jumps.len(); - accumulator.push(rule_jumps); - - // Finally, builds the translated rule by consuming the accumulator. - let mut result = Vec::with_capacity(rule_len); - accumulator - .into_iter() - .rev() - .for_each(|mut instructions| result.append(&mut instructions)); - - result - } -} - -impl SeccompFilter { - /// Creates a new filter with a set of rules and a default action. - /// - /// # Arguments - /// - /// * `rules` - Map of syscall numbers and the rules that will be applied to each of them. - /// * `default_action` - Action taken for all syscalls that do not match any rule. - /// * `target_arch` - Target architecture of the generated BPF filter. - pub fn new( - rules: SeccompRuleMap, - default_action: SeccompAction, - target_arch: &str, - ) -> Result { - let instance = Self { - rules, - default_action, - target_arch: target_arch.try_into().map_err(FilterError::Arch)?, - }; - - instance.validate()?; - - Ok(instance) - } - - /// Performs semantic checks on the SeccompFilter. - fn validate(&self) -> Result<(), FilterError> { - for (syscall_number, syscall_rules) in self.rules.iter() { - // All inserted syscalls must have at least one rule, otherwise BPF code will break. - if syscall_rules.is_empty() { - return Err(FilterError::EmptyRulesVector); - } - - // Now check for conflicting rules. - // Match on the number of empty rules for the given syscall. - // An `empty rule` is a rule that doesn't have any argument checks. - match syscall_rules - .iter() - .filter(|rule| rule.conditions.is_empty()) - .count() - { - // If the syscall has an empty rule, it may only have that rule. - 1 if syscall_rules.len() > 1 => { - return Err(FilterError::ConflictingRules(*syscall_number)); - } - // This syscall only has the one rule, so is valid. - 1 if syscall_rules.len() <= 1 => {} - // The syscall has no empty rules. - 0 => {} - // For a greater than 1 number of empty rules, error out. - _ => { - return Err(FilterError::ConflictingRules(*syscall_number)); - } - } - } - - Ok(()) - } - - /// Appends a chain of rules to an accumulator, updating the length of the filter. - /// - /// # Arguments - /// - /// * `syscall_number` - The syscall to which the rules apply. - /// * `chain` - The chain of rules for the specified syscall. - /// * `default_action` - The action to be taken in none of the rules apply. - /// * `accumulator` - The expanding BPF program. - /// * `filter_len` - The size (in number of BPF statements) of the BPF program. This is limited - /// to 4096. If the limit is exceeded, the filter is invalidated. - fn append_syscall_chain( - syscall_number: i64, - chain: Vec, - default_action: u32, - accumulator: &mut Vec>, - filter_len: &mut usize, - ) -> Result<(), FilterError> { - // The rules of the chain are translated into BPF statements. - let chain: Vec<_> = chain.into_iter().map(SeccompRule::into).collect(); - let chain_len: usize = chain.iter().map(std::vec::Vec::len).sum(); - - // The chain starts with a comparison checking the loaded syscall number against the - // syscall number of the chain. - let mut built_syscall = Vec::with_capacity(1 + chain_len + 1); - built_syscall.push(BPF_JUMP( - BPF_JMP + BPF_JEQ + BPF_K, - u32::try_from(syscall_number).unwrap(), - 0, - 1, - )); - - // The rules of the chain are appended. - chain - .into_iter() - .for_each(|mut rule| built_syscall.append(&mut rule)); - - // The default action is appended, if the syscall number comparison matched and then all - // rules fail to match, the default action is reached. - built_syscall.push(BPF_STMT(BPF_RET + BPF_K, default_action)); - - // The chain is appended to the result. - *filter_len += built_syscall.len(); - accumulator.push(built_syscall); - - // BPF programs are limited to 4096 statements. - if *filter_len >= usize::from(BPF_MAX_LEN) { - return Err(FilterError::FilterTooLarge); - } - - Ok(()) - } -} - -impl TryInto for SeccompFilter { - type Error = FilterError; - fn try_into(self) -> Result { - // Initialize the result with the precursory architecture check. - let mut result = VALIDATE_ARCHITECTURE(self.target_arch); - - // If no rules are set up, the filter will always return the default action, - // so let's short-circuit the function. - if self.rules.is_empty() { - result.extend(vec![BPF_STMT( - BPF_RET + BPF_K, - u32::from(self.default_action), - )]); - - return Ok(result); - } - - // The called syscall number is loaded. - let mut accumulator = Vec::with_capacity(1); - let mut filter_len = 1; - accumulator.push(EXAMINE_SYSCALL()); - - // Orders syscalls by priority, the highest number represents the highest priority. - let mut iter = self.rules.into_iter(); - - // For each syscall adds its rule chain to the filter. - let default_action = u32::from(self.default_action); - iter.try_for_each(|(syscall_number, chain)| { - SeccompFilter::append_syscall_chain( - syscall_number, - chain, - default_action, - &mut accumulator, - &mut filter_len, - ) - })?; - - // The default action is once again appended, it is reached if all syscall number - // comparisons fail. - filter_len += 1; - accumulator.push(vec![BPF_STMT(BPF_RET + BPF_K, default_action)]); - - // Finally, builds the translated filter by consuming the accumulator. - result.reserve(filter_len); - accumulator - .into_iter() - .for_each(|mut instructions| result.append(&mut instructions)); - - if result.len() >= usize::from(BPF_MAX_LEN) { - return Err(FilterError::FilterTooLarge); - } - - Ok(result) - } -} - -/// Builds a `jump` BPF instruction. -/// -/// # Arguments -/// -/// * `code` - The operation code. -/// * `jt` - The jump offset in case the operation returns `true`. -/// * `jf` - The jump offset in case the operation returns `false`. -/// * `k` - The operand. -#[allow(non_snake_case)] -#[inline(always)] -fn BPF_JUMP(code: u16, k: u32, jt: u8, jf: u8) -> sock_filter { - sock_filter { code, jt, jf, k } -} - -/// Builds a "statement" BPF instruction. -/// -/// # Arguments -/// -/// * `code` - The operation code. -/// * `k` - The operand. -#[allow(non_snake_case)] -#[inline(always)] -fn BPF_STMT(code: u16, k: u32) -> sock_filter { - sock_filter { - code, - jt: 0, - jf: 0, - k, - } -} - -/// Builds a sequence of BPF instructions that validate the underlying architecture. -#[allow(non_snake_case)] -#[inline(always)] -fn VALIDATE_ARCHITECTURE(target_arch: TargetArch) -> Vec { - let audit_arch_value = target_arch.get_audit_value(); - vec![ - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, 4), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, audit_arch_value, 1, 0), - BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_KILL_PROCESS), - ] -} - -/// Builds a sequence of BPF instructions that are followed by syscall examination. -#[allow(non_snake_case)] -#[inline(always)] -fn EXAMINE_SYSCALL() -> Vec { - vec![BPF_STMT( - BPF_LD + BPF_W + BPF_ABS, - u32::from(SECCOMP_DATA_NR_OFFSET), - )] -} - -#[cfg(test)] -mod tests { - #![allow(clippy::undocumented_unsafe_blocks)] - use std::env::consts::ARCH; - use std::thread; - - use super::SeccompCmpOp::*; - use super::{SeccompCmpArgLen as ArgLen, SeccompCondition as Cond, *}; - - // BPF structure definition for filter array. - // See /usr/include/linux/filter.h . - #[repr(C)] - struct sock_fprog { - pub len: ::std::os::raw::c_ushort, - pub filter: *const sock_filter, - } - - // Builds the (syscall, rules) tuple for allowing a syscall with certain arguments. - fn allow_syscall_if(syscall_number: i64, rules: Vec) -> (i64, Vec) { - (syscall_number, rules) - } - - impl SeccompCondition { - /// Creates a new `SeccompCondition`. - pub fn new( - arg_number: u8, - arg_len: SeccompCmpArgLen, - operator: SeccompCmpOp, - value: u64, - ) -> Result { - let instance = Self { - arg_number, - arg_len, - operator, - value, - comment: None, - }; - - instance.validate().map(|_| Ok(instance))? - } - } - - // The type of the `req` parameter is different for the `musl` library. This will enable - // successful build for other non-musl libraries. - #[cfg(target_env = "musl")] - type IoctlRequest = i32; - #[cfg(not(target_env = "musl"))] - type IoctlRequest = u64; - - // We use KVM_GET_PIT2 as the second parameter for ioctl syscalls in some unit tests - // because it's a corner case. More details - // [here](https://github.com/firecracker-microvm/firecracker/issues/1206) - const KVM_GET_PIT2: u64 = 0x8070_ae9f; - const KVM_GET_PIT2_MSB: u64 = 0x0000_ae9f; - const KVM_GET_PIT2_LSB: u64 = 0x8070_0000; - - const EXTRA_SYSCALLS: [i64; 6] = [ - libc::SYS_rt_sigprocmask, - libc::SYS_sigaltstack, - libc::SYS_munmap, - libc::SYS_exit, - libc::SYS_rt_sigreturn, - libc::SYS_futex, - ]; - - fn install_filter(bpf_filter: BpfProgram) { - unsafe { - { - let rc = libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - assert_eq!(rc, 0); - } - let bpf_prog = sock_fprog { - len: u16::try_from(bpf_filter.len()).unwrap(), - filter: bpf_filter.as_ptr(), - }; - let bpf_prog_ptr = &bpf_prog as *const sock_fprog; - { - let rc = libc::prctl( - libc::PR_SET_SECCOMP, - libc::SECCOMP_MODE_FILTER, - bpf_prog_ptr, - ); - assert_eq!(rc, 0); - } - } - } - - fn validate_seccomp_filter( - rules: Vec<(i64, Vec)>, - validation_fn: fn(), - should_fail: bool, - ) { - let failure_code: i32 = 1000; - - let mut rule_map: SeccompRuleMap = rules.into_iter().collect(); - - for syscall in EXTRA_SYSCALLS.iter() { - rule_map - .entry(*syscall) - .or_default() - .append(&mut vec![SeccompRule::new(vec![], SeccompAction::Allow)]); - } - - // Build seccomp filter. - let filter = SeccompFilter::new( - rule_map, - SeccompAction::Errno(u32::try_from(failure_code).unwrap()), - ARCH, - ) - .unwrap(); - - // We need to run the validation inside another thread in order to avoid setting - // the seccomp filter for the entire unit tests process. - let errno = thread::spawn(move || { - // Install the filter. - install_filter(filter.try_into().unwrap()); - - // Call the validation fn. - validation_fn(); - - // Return errno. - std::io::Error::last_os_error().raw_os_error().unwrap() - }) - .join() - .unwrap(); - - // In case of a seccomp denial `errno` should be `failure_code` - if should_fail { - assert_eq!(errno, failure_code); - } else { - assert_ne!(errno, failure_code); - } - } - - #[test] - fn test_eq_operator() { - // check use cases for SeccompCmpArgLen::DWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(1, SeccompCmpArgLen::Dword, Eq, KVM_GET_PIT2).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, KVM_GET_PIT2 as IoctlRequest); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, 0); - }, - true, - ); - - // check use cases for SeccompCmpArgLen::QWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(2, SeccompCmpArgLen::Qword, Eq, u64::MAX).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, 0, u64::MAX); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, 0, 0); - }, - true, - ); - } - - #[test] - fn test_ge_operator() { - // check use case for SeccompCmpArgLen::DWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(1, SeccompCmpArgLen::Dword, Ge, KVM_GET_PIT2).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, KVM_GET_PIT2 as IoctlRequest); - libc::ioctl(0, (KVM_GET_PIT2 + 1) as IoctlRequest); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, (KVM_GET_PIT2 - 1) as IoctlRequest); - }, - true, - ); - - // check use case for SeccompCmpArgLen::QWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(2, SeccompCmpArgLen::Qword, Ge, u64::from(u32::MAX)).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, 0, u64::from(u32::MAX)); - libc::ioctl(0, 0, u64::from(u32::MAX) + 1); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, 0, 1); - }, - true, - ); - } - - #[test] - fn test_gt_operator() { - // check use case for SeccompCmpArgLen::DWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(1, SeccompCmpArgLen::Dword, Gt, KVM_GET_PIT2).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, (KVM_GET_PIT2 + 1) as IoctlRequest); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, KVM_GET_PIT2 as IoctlRequest); - }, - true, - ); - - // check use case for SeccompCmpArgLen::QWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(2, SeccompCmpArgLen::Qword, Gt, u64::from(u32::MAX) + 10).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, 0, u64::from(u32::MAX) + 11); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, 0, u64::from(u32::MAX) + 10); - }, - true, - ); - } - - #[test] - fn test_le_operator() { - // check use case for SeccompCmpArgLen::DWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(1, SeccompCmpArgLen::Dword, Le, KVM_GET_PIT2).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, KVM_GET_PIT2 as IoctlRequest); - libc::ioctl(0, (KVM_GET_PIT2 - 1) as IoctlRequest); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, (KVM_GET_PIT2 + 1) as IoctlRequest); - }, - true, - ); - - // check use case for SeccompCmpArgLen::QWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(2, SeccompCmpArgLen::Qword, Le, u64::from(u32::MAX) + 10).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, 0, u64::from(u32::MAX) + 10); - libc::ioctl(0, 0, u64::from(u32::MAX) + 9); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, 0, u64::from(u32::MAX) + 11); - }, - true, - ); - } - - #[test] - fn test_lt_operator() { - // check use case for SeccompCmpArgLen::DWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(1, SeccompCmpArgLen::Dword, Lt, KVM_GET_PIT2).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, (KVM_GET_PIT2 - 1) as IoctlRequest); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, KVM_GET_PIT2 as IoctlRequest); - }, - true, - ); - - // check use case for SeccompCmpArgLen::QWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(2, SeccompCmpArgLen::Qword, Lt, u64::from(u32::MAX) + 10).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, 0, u64::from(u32::MAX) + 9); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, 0, u64::from(u32::MAX) + 10); - }, - true, - ); - } - - #[test] - fn test_masked_eq_operator() { - // check use case for SeccompCmpArgLen::DWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new( - 1, - SeccompCmpArgLen::Dword, - MaskedEq(KVM_GET_PIT2_MSB), - KVM_GET_PIT2, - ) - .unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, KVM_GET_PIT2 as IoctlRequest); - libc::ioctl(0, KVM_GET_PIT2_MSB as IoctlRequest); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, KVM_GET_PIT2_LSB as IoctlRequest); - }, - true, - ); - - // check use case for SeccompCmpArgLen::QWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new( - 2, - SeccompCmpArgLen::Qword, - MaskedEq(u64::from(u32::MAX)), - u64::MAX, - ) - .unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, 0, u64::from(u32::MAX)); - libc::ioctl(0, 0, u64::MAX); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, 0, 0); - }, - true, - ); - } - - #[test] - fn test_ne_operator() { - // check use case for SeccompCmpArgLen::DWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(1, SeccompCmpArgLen::Dword, Ne, KVM_GET_PIT2).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, 0); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, KVM_GET_PIT2 as IoctlRequest); - }, - true, - ); - - // check use case for SeccompCmpArgLen::QWORD - let rules = vec![allow_syscall_if( - libc::SYS_ioctl, - vec![SeccompRule::new( - vec![Cond::new(2, SeccompCmpArgLen::Qword, Ne, u64::MAX).unwrap()], - SeccompAction::Allow, - )], - )]; - // check syscalls that are supposed to work - validate_seccomp_filter( - rules.clone(), - || unsafe { - libc::ioctl(0, 0, 0); - }, - false, - ); - // check syscalls that are not supposed to work - validate_seccomp_filter( - rules, - || unsafe { - libc::ioctl(0, 0, u64::MAX); - }, - true, - ); - } - - // Checks that rule gets translated correctly into BPF statements. - #[test] - fn test_rule_bpf_output() { - Cond::new(6, ArgLen::Qword, Eq, 1).unwrap_err(); - - // Builds rule. - let rule = SeccompRule::new( - vec![ - Cond::new(0, ArgLen::Dword, Eq, 1).unwrap(), - Cond::new(2, ArgLen::Qword, MaskedEq(0b1010), 14).unwrap(), - ], - SeccompAction::Allow, - ); - - let (msb_offset, lsb_offset) = { (4, 0) }; - - // Builds hardcoded BPF instructions. - let instructions = vec![ - BPF_STMT(0x05, 1), - BPF_STMT(0x05, 10), - BPF_STMT(0x20, 32 + msb_offset), - BPF_STMT(0x54, 0), - BPF_JUMP(0x15, 0, 0, 6), - BPF_STMT(0x20, 32 + lsb_offset), - BPF_STMT(0x54, 0b1010), - BPF_JUMP(0x15, 14 & 0b1010, 0, 3), - BPF_STMT(0x20, 16 + lsb_offset), - BPF_JUMP(0x15, 1, 0, 1), - BPF_STMT(0x06, 0x7fff_0000), - ]; - - // Compares translated rule with hardcoded BPF instructions. - let bpfprog: BpfProgram = rule.into(); - assert_eq!(bpfprog, instructions); - } - - // Checks that rule with too many conditions gets translated correctly into BPF statements - // using three helper jumps. - #[test] - fn test_rule_many_conditions_bpf_output() { - // Builds rule. - let mut conditions = Vec::with_capacity(43); - for _ in 0..42 { - conditions.push(Cond::new(0, ArgLen::Qword, MaskedEq(0), 0).unwrap()); - } - conditions.push(Cond::new(0, ArgLen::Qword, Eq, 0).unwrap()); - let rule = SeccompRule::new(conditions, SeccompAction::Allow); - - let (msb_offset, lsb_offset) = { (4, 0) }; - - // Builds hardcoded BPF instructions. - let mut instructions = vec![ - BPF_STMT(0x05, 1), - BPF_STMT(0x05, 6), - BPF_STMT(0x20, 16 + msb_offset), - BPF_JUMP(0x15, 0, 0, 3), - BPF_STMT(0x20, 16 + lsb_offset), - BPF_JUMP(0x15, 0, 0, 1), - BPF_STMT(0x05, 2), - BPF_STMT(0x05, 254), - BPF_STMT(0x05, 254), - ]; - let mut offset = 253; - for _ in 0..42 { - offset -= 6; - instructions.append(&mut vec![ - BPF_STMT(0x20, 16 + msb_offset), - BPF_STMT(0x54, 0), - BPF_JUMP(0x15, 0, 0, offset + 3), - BPF_STMT(0x20, 16 + lsb_offset), - BPF_STMT(0x54, 0), - BPF_JUMP(0x15, 0, 0, offset), - ]); - } - instructions.push(BPF_STMT(0x06, 0x7fff_0000)); - - // Compares translated rule with hardcoded BPF instructions. - let bpfprog: BpfProgram = rule.into(); - assert_eq!(bpfprog, instructions); - } - - fn create_test_bpf_filter(arg_len: ArgLen) -> SeccompFilter { - SeccompFilter::new( - vec![ - allow_syscall_if( - 1, - vec![ - SeccompRule::new( - vec![ - Cond::new(2, arg_len.clone(), Le, 14).unwrap(), - Cond::new(2, arg_len.clone(), Ne, 10).unwrap(), - ], - SeccompAction::Allow, - ), - SeccompRule::new( - vec![ - Cond::new(2, arg_len.clone(), Gt, 20).unwrap(), - Cond::new(2, arg_len.clone(), Lt, 30).unwrap(), - ], - SeccompAction::Allow, - ), - SeccompRule::new( - vec![Cond::new(2, arg_len.clone(), Ge, 42).unwrap()], - SeccompAction::Allow, - ), - ], - ), - allow_syscall_if( - 9, - vec![SeccompRule::new( - vec![Cond::new(1, arg_len, MaskedEq(0b100), 36).unwrap()], - SeccompAction::Allow, - )], - ), - ] - .into_iter() - .collect(), - SeccompAction::Trap, - ARCH, - ) - .unwrap() - } - - #[test] - fn test_filter_bpf_output_dword() { - // Compares translated filter with hardcoded BPF program. - { - let mut empty_rule_map = BTreeMap::new(); - empty_rule_map.insert(1, vec![]); - SeccompFilter::new(empty_rule_map, SeccompAction::Allow, ARCH).unwrap_err(); - } - - let filter = create_test_bpf_filter(ArgLen::Dword); - - let mut instructions = Vec::new(); - instructions.extend(VALIDATE_ARCHITECTURE(ARCH.try_into().unwrap())); - instructions.extend(vec![ - BPF_STMT(0x20, 0), - BPF_JUMP(0x15, 1, 0, 1), - BPF_STMT(0x05, 1), - BPF_STMT(0x05, 6), - BPF_STMT(0x20, 32), - BPF_JUMP(0x15, 10, 3, 0), - BPF_STMT(0x20, 32), - BPF_JUMP(0x25, 14, 1, 0), - BPF_STMT(0x06, 0x7fff_0000), - BPF_STMT(0x05, 1), - BPF_STMT(0x05, 6), - BPF_STMT(0x20, 32), - BPF_JUMP(0x35, 30, 3, 0), - BPF_STMT(0x20, 32), - BPF_JUMP(0x25, 20, 0, 1), - BPF_STMT(0x06, 0x7fff_0000), - BPF_STMT(0x05, 1), - BPF_STMT(0x05, 4), - BPF_STMT(0x20, 32), - BPF_JUMP(0x35, 42, 0, 1), - BPF_STMT(0x06, 0x7fff_0000), - BPF_STMT(0x06, 0x0003_0000), - BPF_JUMP(0x15, 9, 0, 1), - BPF_STMT(0x05, 1), - BPF_STMT(0x05, 5), - BPF_STMT(0x20, 24), - BPF_STMT(0x54, 0b100), - BPF_JUMP(0x15, 36 & 0b100, 0, 1), - BPF_STMT(0x06, 0x7fff_0000), - BPF_STMT(0x06, 0x0003_0000), - BPF_STMT(0x06, 0x0003_0000), - ]); - - let bpfprog: BpfProgram = filter.try_into().unwrap(); - assert_eq!(bpfprog, instructions); - } - - #[test] - fn test_filter_bpf_output_qword() { - // Compares translated filter with hardcoded BPF program. - { - let mut empty_rule_map = BTreeMap::new(); - empty_rule_map.insert(1, vec![]); - SeccompFilter::new(empty_rule_map, SeccompAction::Allow, ARCH).unwrap_err(); - } - - let filter = create_test_bpf_filter(ArgLen::Qword); - - let mut instructions = Vec::new(); - instructions.extend(VALIDATE_ARCHITECTURE(ARCH.try_into().unwrap())); - instructions.extend(vec![ - BPF_STMT(0x20, 0), - BPF_JUMP(0x15, 1, 0, 1), - BPF_STMT(0x05, 1), - BPF_STMT(0x05, 11), - BPF_STMT(0x20, 36), - BPF_JUMP(0x15, 0, 0, 2), - BPF_STMT(0x20, 32), - BPF_JUMP(0x15, 10, 6, 0), - BPF_STMT(0x20, 36), - BPF_JUMP(0x25, 0, 4, 0), - BPF_JUMP(0x15, 0, 0, 2), - BPF_STMT(0x20, 32), - BPF_JUMP(0x25, 14, 1, 0), - BPF_STMT(0x06, 0x7fff_0000), - BPF_STMT(0x05, 1), - BPF_STMT(0x05, 12), - BPF_STMT(0x20, 36), - BPF_JUMP(0x25, 0, 9, 0), - BPF_JUMP(0x15, 0, 0, 2), - BPF_STMT(0x20, 32), - BPF_JUMP(0x35, 30, 6, 0), - BPF_STMT(0x20, 36), - BPF_JUMP(0x25, 0, 3, 0), - BPF_JUMP(0x15, 0, 0, 3), - BPF_STMT(0x20, 32), - BPF_JUMP(0x25, 20, 0, 1), - BPF_STMT(0x06, 0x7fff_0000), - BPF_STMT(0x05, 1), - BPF_STMT(0x05, 7), - BPF_STMT(0x20, 36), - BPF_JUMP(0x25, 0, 3, 0), - BPF_JUMP(0x15, 0, 0, 3), - BPF_STMT(0x20, 32), - BPF_JUMP(0x35, 42, 0, 1), - BPF_STMT(0x06, 0x7fff_0000), - BPF_STMT(0x06, 0x0003_0000), - BPF_JUMP(0x15, 9, 0, 1), - BPF_STMT(0x05, 1), - BPF_STMT(0x05, 8), - BPF_STMT(0x20, 28), - BPF_STMT(0x54, 0), - BPF_JUMP(0x15, 0, 0, 4), - BPF_STMT(0x20, 24), - BPF_STMT(0x54, 0b100), - BPF_JUMP(0x15, 36 & 0b100, 0, 1), - BPF_STMT(0x06, 0x7fff_0000), - BPF_STMT(0x06, 0x0003_0000), - BPF_STMT(0x06, 0x0003_0000), - ]); - - let bpfprog: BpfProgram = filter.try_into().unwrap(); - assert_eq!(bpfprog, instructions); - } - - #[test] - fn test_bpf_expanding_functions() { - // Compares the output of the BPF instruction generating functions to hardcoded - // instructions. - assert_eq!( - BPF_STMT(BPF_LD + BPF_W + BPF_ABS, 16), - sock_filter { - code: 0x20, - jt: 0, - jf: 0, - k: 16, - } - ); - assert_eq!( - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 10, 2, 5), - sock_filter { - code: 0x15, - jt: 2, - jf: 5, - k: 10, - } - ); - } - - #[test] - fn test_bpf_functions() { - { - let ret = VALIDATE_ARCHITECTURE(ARCH.try_into().unwrap()); - let instructions = vec![ - sock_filter { - code: 32, - jt: 0, - jf: 0, - k: 4, - }, - sock_filter { - code: 21, - jt: 1, - jf: 0, - #[cfg(target_arch = "x86_64")] - k: AUDIT_ARCH_X86_64, - #[cfg(target_arch = "aarch64")] - k: AUDIT_ARCH_AARCH64, - }, - sock_filter { - code: 6, - jt: 0, - jf: 0, - k: SECCOMP_RET_KILL_PROCESS, - }, - ]; - assert_eq!(ret, instructions); - } - - { - let ret = EXAMINE_SYSCALL(); - let instructions = vec![sock_filter { - code: 32, - jt: 0, - jf: 0, - k: 0, - }]; - assert_eq!(ret, instructions); - } - } - - #[test] - fn test_empty_filter() { - // An empty filter should always return the default action. - // For example, for an empty allowlist, it should always trap/kill, - // for an empty denylist, it should allow allow all system calls. - - let mut expected_program = Vec::new(); - expected_program.extend(VALIDATE_ARCHITECTURE(ARCH.try_into().unwrap())); - expected_program.extend(vec![BPF_STMT(0x06, 0x7fff_0000)]); - - let empty_rule_map = BTreeMap::new(); - let filter = SeccompFilter::new(empty_rule_map, SeccompAction::Allow, ARCH).unwrap(); - let prog: BpfProgram = filter.try_into().unwrap(); - - assert_eq!(expected_program, prog); - - // This should allow any system calls. - let pid = thread::spawn(move || { - // Install the filter. - install_filter(prog); - - unsafe { libc::getpid() } - }) - .join() - .unwrap(); - - // Check that the getpid syscall returned successfully. - assert!(pid > 0); - } - - #[test] - fn test_error_messages() { - assert_eq!( - format!("{}", FilterError::EmptyRulesVector), - "The seccomp rules vector is empty." - ); - assert_eq!( - format!("{}", FilterError::FilterTooLarge), - "The seccomp filter contains too many BPF instructions." - ); - assert_eq!( - format!("{}", FilterError::InvalidArgumentNumber), - "The seccomp rule contains an invalid argument number." - ); - assert_eq!( - format!( - "{}", - FilterError::Arch(TargetArchError::InvalidString("lala".to_string())) - ), - format!("{0}", TargetArchError::InvalidString("lala".to_string())) - ); - } - - #[test] - fn test_from_seccomp_action() { - assert_eq!(0x7fff_0000, u32::from(SeccompAction::Allow)); - assert_eq!(0x0005_002a, u32::from(SeccompAction::Errno(42))); - assert_eq!(0x0000_0000, u32::from(SeccompAction::KillThread)); - assert_eq!(0x8000_0000, u32::from(SeccompAction::KillProcess)); - assert_eq!(0x7ffc_0000, u32::from(SeccompAction::Log)); - assert_eq!(0x7ff0_002a, u32::from(SeccompAction::Trace(42))); - assert_eq!(0x0003_0000, u32::from(SeccompAction::Trap)); - } - - #[test] - fn test_validate_condition() { - // Invalid argument number - assert_eq!( - Cond::new(90, ArgLen::Dword, Eq, 65), - Err(FilterError::InvalidArgumentNumber) - ); - - // Valid argument number - Cond::new(0, ArgLen::Dword, Eq, 65).unwrap(); - } - - #[test] - fn test_seccomp_filter_validate() { - // Failure cases. - { - // Syscall has no rules. - assert_eq!( - SeccompFilter::new( - vec![(1, vec![]),].into_iter().collect(), - SeccompAction::Trap, - ARCH, - ) - .unwrap_err(), - FilterError::EmptyRulesVector - ); - // Syscall has multiple empty rules. - assert_eq!( - SeccompFilter::new( - vec![( - 1, - vec![ - SeccompRule::new(vec![], SeccompAction::Allow), - SeccompRule::new(vec![], SeccompAction::Allow) - ] - ),] - .into_iter() - .collect(), - SeccompAction::Trap, - ARCH, - ) - .unwrap_err(), - FilterError::ConflictingRules(1) - ); - - // Syscall has both empty rules condition-based rules. - assert_eq!( - SeccompFilter::new( - vec![( - 1, - vec![ - SeccompRule::new(vec![], SeccompAction::Allow), - SeccompRule::new( - vec![ - Cond::new(2, ArgLen::Dword, Le, 14).unwrap(), - Cond::new(1, ArgLen::Dword, Ne, 10).unwrap(), - ], - SeccompAction::Allow, - ), - ] - ),] - .into_iter() - .collect(), - SeccompAction::Trap, - ARCH, - ) - .unwrap_err(), - FilterError::ConflictingRules(1) - ); - } - } -} diff --git a/src/seccompiler/src/bin.rs b/src/seccompiler/src/bin.rs new file mode 100644 index 00000000000..8fb9d0fd511 --- /dev/null +++ b/src/seccompiler/src/bin.rs @@ -0,0 +1,40 @@ +// Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use clap::Parser; +use seccompiler::{compile_bpf, CompilationError}; + +const DEFAULT_OUTPUT_FILENAME: &str = "seccomp_binary_filter.out"; + +#[derive(Debug, Parser)] +#[command(version = format!("v{}", env!("CARGO_PKG_VERSION")))] +struct Cli { + #[arg( + short, + long, + help = "The computer architecture where the BPF program runs. Supported architectures: \ + x86_64, aarch64." + )] + target_arch: String, + #[arg(short, long, help = "File path of the JSON input.")] + input_file: String, + #[arg(short, long, help = "Optional path of the output file.", default_value = DEFAULT_OUTPUT_FILENAME)] + output_file: String, + #[arg( + short, + long, + help = "Deprecated! Transforms the filters into basic filters. Drops all argument checks \ + and rule-level actions. Not recommended." + )] + basic: bool, +} + +fn main() -> Result<(), CompilationError> { + let cli = Cli::parse(); + compile_bpf( + &cli.input_file, + &cli.target_arch, + &cli.output_file, + cli.basic, + ) +} diff --git a/src/seccompiler/src/bindings.rs b/src/seccompiler/src/bindings.rs new file mode 100644 index 00000000000..969ea91cd1c --- /dev/null +++ b/src/seccompiler/src/bindings.rs @@ -0,0 +1,171 @@ +// Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// Copyright 2021 Sony Group Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +//! Raw FFI bindings for libseccomp library + +use std::os::raw::*; + +pub const MINUS_EEXIST: i32 = -libc::EEXIST; + +/// Filter context/handle (`*mut`) +pub type scmp_filter_ctx = *mut c_void; +/// Filter context/handle (`*const`) +pub type const_scmp_filter_ctx = *const c_void; + +/// Comparison operators +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[repr(C)] +pub enum scmp_compare { + _SCMP_CMP_MIN = 0, + /// not equal + SCMP_CMP_NE = 1, + /// less than + SCMP_CMP_LT = 2, + /// less than or equal + SCMP_CMP_LE = 3, + /// equal + SCMP_CMP_EQ = 4, + /// greater than or equal + SCMP_CMP_GE = 5, + /// greater than + SCMP_CMP_GT = 6, + /// masked equality + SCMP_CMP_MASKED_EQ = 7, + _SCMP_CMP_MAX, +} + +/// Argument datum +pub type scmp_datum_t = u64; + +/// Argument / Value comparison definition +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[repr(C)] +pub struct scmp_arg_cmp { + /// argument number, starting at 0 + pub arg: c_uint, + /// the comparison op, e.g. `SCMP_CMP_*` + pub op: scmp_compare, + pub datum_a: scmp_datum_t, + pub datum_b: scmp_datum_t, +} + +pub const SCMP_ARCH_X86_64: u32 = 0xc000003e; +pub const SCMP_ARCH_AARCH64: u32 = 0xc00000b7; +/// Kill the process +pub const SCMP_ACT_KILL_PROCESS: u32 = 0x80000000; +/// Kill the thread +pub const SCMP_ACT_KILL_THREAD: u32 = 0x00000000; +/// Throw a `SIGSYS` signal +pub const SCMP_ACT_TRAP: u32 = 0x00030000; +/// Notifies userspace +pub const SCMP_ACT_ERRNO_MASK: u32 = 0x00050000; +/// Return the specified error code +#[must_use] +pub const fn SCMP_ACT_ERRNO(x: u16) -> u32 { + SCMP_ACT_ERRNO_MASK | x as u32 +} +pub const SCMP_ACT_TRACE_MASK: u32 = 0x7ff00000; +/// Notify a tracing process with the specified value +#[must_use] +pub const fn SCMP_ACT_TRACE(x: u16) -> u32 { + SCMP_ACT_TRACE_MASK | x as u32 +} +/// Allow the syscall to be executed after the action has been logged +pub const SCMP_ACT_LOG: u32 = 0x7ffc0000; +/// Allow the syscall to be executed +pub const SCMP_ACT_ALLOW: u32 = 0x7fff0000; + +#[link(name = "seccomp")] +unsafe extern "C" { + /// Initialize the filter state + /// + /// - `def_action`: the default filter action + /// + /// This function initializes the internal seccomp filter state and should + /// be called before any other functions in this library to ensure the filter + /// state is initialized. Returns a filter context on success, `ptr::null()` on failure. + pub safe fn seccomp_init(def_action: u32) -> scmp_filter_ctx; + + /// Adds an architecture to the filter + /// + /// - `ctx`: the filter context + /// - `arch_token`: the architecture token, e.g. `SCMP_ARCH_*` + /// + /// This function adds a new architecture to the given seccomp filter context. + /// Any new rules added after this function successfully returns will be added + /// to this architecture but existing rules will not be added to this + /// architecture. If the architecture token is [`SCMP_ARCH_NATIVE`] then the native + /// architecture will be assumed. Returns zero on success, `-libc::EEXIST` if + /// specified architecture is already present, other negative values on failure. + pub fn seccomp_arch_add(ctx: scmp_filter_ctx, arch_token: u32) -> c_int; + + /// Resolve a syscall name to a number + /// + /// - `name`: the syscall name + /// + /// Resolve the given syscall name to the syscall number. Returns the syscall + /// number on success, including negative pseudo syscall numbers (e.g. `__PNR_*`); + /// returns [`__NR_SCMP_ERROR`] on failure. + pub fn seccomp_syscall_resolve_name(name: *const c_char) -> c_int; + + /// Add a new rule to the filter + /// + /// - `ctx`: the filter context + /// - `action`: the filter action + /// - `syscall`: the syscall number + /// - `arg_cnt`: the number of argument filters in the argument filter chain + /// - `...`: [`scmp_arg_cmp`] structs + /// + /// This function adds a series of new argument/value checks to the seccomp + /// filter for the given syscall; multiple argument/value checks can be + /// specified and they will be chained together (AND'd together) in the filter. + /// If the specified rule needs to be adjusted due to architecture specifics it + /// will be adjusted without notification. Returns zero on success, negative + /// values on failure. + pub fn seccomp_rule_add( + ctx: scmp_filter_ctx, + action: u32, + syscall: c_int, + arg_cnt: c_uint, + ... + ) -> c_int; + + /// Add a new rule to the filter + /// + /// - `ctx`: the filter context + /// - `action`: the filter action + /// - `syscall`: the syscall number + /// - `arg_cnt`: the number of elements in the arg_array parameter + /// - `arg_array`: array of [`scmp_arg_cmp`] structs + /// + /// This function adds a series of new argument/value checks to the seccomp + /// filter for the given syscall; multiple argument/value checks can be + /// specified and they will be chained together (AND'd together) in the filter. + /// If the specified rule needs to be adjusted due to architecture specifics it + /// will be adjusted without notification. Returns zero on success, negative + /// values on failure. + pub fn seccomp_rule_add_array( + ctx: scmp_filter_ctx, + action: u32, + syscall: c_int, + arg_cnt: c_uint, + arg_array: *const scmp_arg_cmp, + ) -> c_int; + + /// Generate seccomp Berkeley Packet Filter (BPF) code and export it to a file + /// + /// - `ctx`: the filter context + /// - `fd`: the destination fd + /// + /// This function generates seccomp Berkeley Packer Filter (BPF) code and writes + /// it to the given fd. Returns zero on success, negative values on failure. + pub fn seccomp_export_bpf(ctx: const_scmp_filter_ctx, fd: c_int) -> c_int; +} + +/// Negative pseudo syscall number returned by some functions in case of an error +pub const __NR_SCMP_ERROR: c_int = -1; diff --git a/src/seccompiler/src/common.rs b/src/seccompiler/src/common.rs deleted file mode 100644 index 80ff96ad9f3..00000000000 --- a/src/seccompiler/src/common.rs +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -//! Module that defines common data structures used by both the library crate -//! and seccompiler-bin. - -use serde::{Deserialize, Serialize}; - -/// The maximum seccomp-BPF program length allowed by the linux kernel. -pub const BPF_MAX_LEN: u16 = 4096; - -/// BPF instruction structure definition. -/// See /usr/include/linux/filter.h . -#[repr(C)] -#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] -#[doc(hidden)] -pub struct sock_filter { - pub code: ::std::os::raw::c_ushort, - pub jt: ::std::os::raw::c_uchar, - pub jf: ::std::os::raw::c_uchar, - pub k: ::std::os::raw::c_uint, -} - -/// Program made up of a sequence of BPF instructions. -pub type BpfProgram = Vec; diff --git a/src/seccompiler/src/compiler.rs b/src/seccompiler/src/compiler.rs deleted file mode 100644 index 9194bc7e5bd..00000000000 --- a/src/seccompiler/src/compiler.rs +++ /dev/null @@ -1,540 +0,0 @@ -// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -//! Module defining the logic for compiling the deserialized filter objects into the IR. -//! Used by seccompiler-bin. -//! -//! Via the `Compiler::compile_blob()` method, it also drives the entire JSON -> BLOB -//! transformation process. -//! -//! It also defines some of the objects that a JSON seccomp filter is deserialized into: -//! [`Filter`](struct.Filter.html), -//! [`SyscallRule`](struct.SyscallRule.html). -// -//! The rest of objects are deserialized directly into the IR (intermediate representation): -//! [`SeccompCondition`](../backend/struct.SeccompCondition.html), -//! [`SeccompAction`](../backend/enum.SeccompAction.html), -//! [`SeccompCmpOp`](../backend/enum.SeccompCmpOp.html), -//! [`SeccompCmpArgLen`](../backend/enum.SeccompCmpArgLen.html). - -use std::collections::BTreeMap; -use std::convert::{Into, TryInto}; -use std::{fmt, result}; - -use serde::de::{self, Error as _, MapAccess, Visitor}; -use serde::Deserialize; - -use crate::backend::{ - Comment, FilterError, SeccompAction, SeccompCondition, SeccompFilter, SeccompRule, - SeccompRuleMap, TargetArch, -}; -use crate::common::BpfProgram; -use crate::syscall_table::SyscallTable; - -/// Errors compiling Filters into BPF. -#[derive(Debug, PartialEq, thiserror::Error, displaydoc::Display)] -pub enum CompilationError { - /// `filter_action` and `default_action` are equal. - IdenticalActions, - /// {0} - Filter(#[from] FilterError), - /// Invalid syscall name: {0} for given arch: {1:?}. - SyscallName(String, TargetArch), -} - -/// Deserializable object that represents the Json filter file. -#[derive(Debug)] -pub struct JsonFile(pub BTreeMap); - -// Implement a custom deserializer, that returns an error for duplicate thread keys. -impl<'de> Deserialize<'de> for JsonFile { - fn deserialize(deserializer: D) -> result::Result - where - D: de::Deserializer<'de>, - { - #[derive(Debug)] - struct JsonFileVisitor; - - impl<'d> Visitor<'d> for JsonFileVisitor { - type Value = BTreeMap; - - fn expecting(&self, f: &mut fmt::Formatter<'_>) -> result::Result<(), fmt::Error> { - f.write_str("a map of filters") - } - - fn visit_map(self, mut access: M) -> result::Result - where - M: MapAccess<'d>, - { - let mut values = Self::Value::new(); - - while let Some((key, value)) = access.next_entry()? { - if values.insert(key, value).is_some() { - return Err(M::Error::custom("duplicate filter key")); - }; - } - - Ok(values) - } - } - Ok(JsonFile(deserializer.deserialize_map(JsonFileVisitor)?)) - } -} - -/// Deserializable object representing a syscall rule. -#[derive(Debug, Deserialize, PartialEq, Clone)] -#[serde(deny_unknown_fields)] -pub struct SyscallRule { - /// Name of the syscall. - syscall: String, - /// Rule conditions. - #[serde(rename = "args")] - conditions: Option>, - /// Optional empty value, represents a `comment` property in the JSON file. - comment: Option, -} - -impl SyscallRule { - /// Perform semantic checks after deserialization. - fn validate(&self) -> Result<(), CompilationError> { - // Validate all `SeccompCondition`s. - if let Some(conditions) = self.conditions.as_ref() { - return conditions - .iter() - .filter_map(|cond| cond.validate().err()) - .next() - .map_or(Ok(()), |err| Err(CompilationError::Filter(err))); - } - - Ok(()) - } -} - -/// Deserializable seccomp filter. Refers to one thread category. -#[derive(Deserialize, PartialEq, Debug, Clone)] -#[serde(deny_unknown_fields)] -pub struct Filter { - /// Default action if no rules match. e.g. `Kill` for an AllowList. - default_action: SeccompAction, - /// Default action if a rule matches. e.g. `Allow` for an AllowList. - filter_action: SeccompAction, - /// The collection of `SyscallRule`s. - filter: Vec, -} - -impl Filter { - /// Perform semantic checks after deserialization. - fn validate(&self) -> Result<(), CompilationError> { - // Doesn't make sense to have equal default and on-match actions. - if self.default_action == self.filter_action { - return Err(CompilationError::IdenticalActions); - } - - // Validate all `SyscallRule`s. - self.filter - .iter() - .filter_map(|syscall_rule| syscall_rule.validate().err()) - .next() - .map_or(Ok(()), Err) - } -} - -/// Object responsible for compiling [`Filter`](struct.Filter.html)s into -/// [`BpfProgram`](../common/type.BpfProgram.html)s. -/// Uses the [`SeccompFilter`](../backend/struct.SeccompFilter.html) interface as an IR language. -#[derive(Debug)] -pub struct Compiler { - /// Target architecture. Can be different from the current `target_arch`. - arch: TargetArch, - /// Target-specific syscall table. - syscall_table: SyscallTable, -} - -impl Compiler { - /// Create a new `Compiler` instance, for the given target architecture. - pub fn new(arch: TargetArch) -> Self { - Self { - arch, - syscall_table: SyscallTable::new(arch), - } - } - - /// Perform semantic checks after deserialization. - fn validate_filters(&self, filters: &BTreeMap) -> Result<(), CompilationError> { - // Validate all `Filter`s. - filters - .iter() - .filter_map(|(_, filter)| filter.validate().err()) - .next() - .map_or(Ok(()), Err) - } - - /// Main compilation function. - pub fn compile_blob( - &self, - filters: BTreeMap, - is_basic: bool, - ) -> Result, CompilationError> { - self.validate_filters(&filters)?; - let mut bpf_map: BTreeMap = BTreeMap::new(); - - for (thread_name, filter) in filters.into_iter() { - if is_basic { - bpf_map.insert( - thread_name, - self.make_basic_seccomp_filter(filter)?.try_into()?, - ); - } else { - bpf_map.insert(thread_name, self.make_seccomp_filter(filter)?.try_into()?); - } - } - Ok(bpf_map) - } - - /// Transforms the deserialized `Filter` into a `SeccompFilter` (IR language). - fn make_seccomp_filter(&self, filter: Filter) -> Result { - let mut rule_map: SeccompRuleMap = SeccompRuleMap::new(); - let filter_action = &filter.filter_action; - - for syscall_rule in filter.filter { - let syscall_name = syscall_rule.syscall; - let action = filter_action.clone(); - let syscall_nr = self - .syscall_table - .get_syscall_nr(&syscall_name) - .ok_or_else(|| CompilationError::SyscallName(syscall_name.clone(), self.arch))?; - let rule_accumulator = rule_map.entry(syscall_nr).or_default(); - - match syscall_rule.conditions { - Some(conditions) => rule_accumulator.push(SeccompRule::new(conditions, action)), - None => rule_accumulator.push(SeccompRule::new(vec![], action)), - }; - } - - SeccompFilter::new(rule_map, filter.default_action, self.arch.into()) - .map_err(CompilationError::Filter) - } - - /// Transforms the deserialized `Filter` into a basic `SeccompFilter` (IR language). - /// This filter will drop any argument checks and any rule-level action. - /// All rules will trigger the filter-level `filter_action`. - fn make_basic_seccomp_filter(&self, filter: Filter) -> Result { - let mut rule_map: SeccompRuleMap = SeccompRuleMap::new(); - let filter_action = &filter.filter_action; - - for syscall_rule in filter.filter { - let syscall_name = syscall_rule.syscall; - // Basic filters bypass the rule-level action and use the filter_action. - let action = filter_action.clone(); - let syscall_nr = self - .syscall_table - .get_syscall_nr(&syscall_name) - .ok_or_else(|| CompilationError::SyscallName(syscall_name.clone(), self.arch))?; - - // If there is already an entry for this syscall, do nothing. - // Otherwise, insert an empty rule that triggers the filter_action. - rule_map - .entry(syscall_nr) - .or_insert_with(|| vec![SeccompRule::new(vec![], action)]); - } - - SeccompFilter::new(rule_map, filter.default_action, self.arch.into()) - .map_err(CompilationError::Filter) - } -} - -#[cfg(test)] -mod tests { - use std::collections::BTreeMap; - use std::convert::TryInto; - use std::env::consts::ARCH; - - use super::{CompilationError, Compiler, Filter, SyscallRule}; - use crate::backend::SeccompCmpArgLen::*; - use crate::backend::SeccompCmpOp::*; - use crate::backend::{ - FilterError, SeccompAction, SeccompCondition as Cond, SeccompFilter, SeccompRule, - TargetArch, - }; - - impl Filter { - fn new( - default_action: SeccompAction, - filter_action: SeccompAction, - filter: Vec, - ) -> Filter { - Filter { - default_action, - filter_action, - filter, - } - } - } - - impl SyscallRule { - fn new(syscall: String, conditions: Option>) -> SyscallRule { - SyscallRule { - syscall, - conditions, - comment: None, - } - } - } - - fn match_syscall(syscall_number: i64, action: SeccompAction) -> (i64, Vec) { - (syscall_number, vec![SeccompRule::new(vec![], action)]) - } - - fn match_syscall_if(syscall_number: i64, rules: Vec) -> (i64, Vec) { - (syscall_number, rules) - } - - #[test] - // Test the transformation of Filter objects into SeccompFilter objects. - // We test this private method because we are interested in seeing that the - // Filter -> SeccompFilter transformation is done correctly. - fn test_make_seccomp_filter() { - let compiler = Compiler::new(ARCH.try_into().unwrap()); - // Test a well-formed filter. Malformed filters are tested in test_compile_blob(). - let filter = Filter::new( - SeccompAction::Trap, - SeccompAction::Allow, - vec![ - SyscallRule::new("read".to_string(), None), - SyscallRule::new( - "futex".to_string(), - Some(vec![ - Cond::new(2, Dword, Le, 65).unwrap(), - Cond::new(1, Qword, Ne, 80).unwrap(), - ]), - ), - SyscallRule::new( - "futex".to_string(), - Some(vec![ - Cond::new(3, Qword, Gt, 65).unwrap(), - Cond::new(1, Qword, Lt, 80).unwrap(), - ]), - ), - SyscallRule::new( - "futex".to_string(), - Some(vec![Cond::new(3, Qword, Ge, 65).unwrap()]), - ), - SyscallRule::new( - "ioctl".to_string(), - Some(vec![Cond::new(3, Dword, MaskedEq(100), 65).unwrap()]), - ), - ], - ); - - // The expected IR. - let seccomp_filter = SeccompFilter::new( - vec![ - match_syscall( - compiler.syscall_table.get_syscall_nr("read").unwrap(), - SeccompAction::Allow, - ), - match_syscall_if( - compiler.syscall_table.get_syscall_nr("futex").unwrap(), - vec![ - SeccompRule::new( - vec![ - Cond::new(2, Dword, Le, 65).unwrap(), - Cond::new(1, Qword, Ne, 80).unwrap(), - ], - SeccompAction::Allow, - ), - SeccompRule::new( - vec![ - Cond::new(3, Qword, Gt, 65).unwrap(), - Cond::new(1, Qword, Lt, 80).unwrap(), - ], - SeccompAction::Allow, - ), - SeccompRule::new( - vec![Cond::new(3, Qword, Ge, 65).unwrap()], - SeccompAction::Allow, - ), - ], - ), - match_syscall_if( - compiler.syscall_table.get_syscall_nr("ioctl").unwrap(), - vec![SeccompRule::new( - vec![Cond::new(3, Dword, MaskedEq(100), 65).unwrap()], - SeccompAction::Allow, - )], - ), - ] - .into_iter() - .collect(), - SeccompAction::Trap, - ARCH, - ) - .unwrap(); - - assert_eq!( - compiler.make_seccomp_filter(filter).unwrap(), - seccomp_filter - ); - } - - #[test] - // Test the transformation of Filter objects into SeccompFilter objects. - // This `basic` alternative version of the make_seccomp_filter method drops argument checks. - fn test_make_basic_seccomp_filter() { - let compiler = Compiler::new(ARCH.try_into().unwrap()); - // Test a well-formed filter. Malformed filters are tested in test_compile_blob(). - let filter = Filter::new( - SeccompAction::Trap, - SeccompAction::Allow, - vec![ - SyscallRule::new("read".to_string(), None), - SyscallRule::new( - "futex".to_string(), - Some(vec![ - Cond::new(2, Dword, Le, 65).unwrap(), - Cond::new(1, Qword, Ne, 80).unwrap(), - ]), - ), - SyscallRule::new( - "futex".to_string(), - Some(vec![ - Cond::new(3, Qword, Gt, 65).unwrap(), - Cond::new(1, Qword, Lt, 80).unwrap(), - ]), - ), - SyscallRule::new( - "futex".to_string(), - Some(vec![Cond::new(3, Qword, Ge, 65).unwrap()]), - ), - SyscallRule::new( - "ioctl".to_string(), - Some(vec![Cond::new(3, Dword, MaskedEq(100), 65).unwrap()]), - ), - ], - ); - - // The expected IR. - let seccomp_filter = SeccompFilter::new( - vec![ - match_syscall( - compiler.syscall_table.get_syscall_nr("read").unwrap(), - SeccompAction::Allow, - ), - match_syscall( - compiler.syscall_table.get_syscall_nr("futex").unwrap(), - SeccompAction::Allow, - ), - match_syscall( - compiler.syscall_table.get_syscall_nr("ioctl").unwrap(), - SeccompAction::Allow, - ), - ] - .into_iter() - .collect(), - SeccompAction::Trap, - ARCH, - ) - .unwrap(); - - assert_eq!( - compiler.make_basic_seccomp_filter(filter).unwrap(), - seccomp_filter - ); - } - - #[test] - fn test_compile_blob() { - let compiler = Compiler::new(ARCH.try_into().unwrap()); - // Test with malformed filters. - - let mut wrong_syscall_name_filters = BTreeMap::new(); - wrong_syscall_name_filters.insert( - "T1".to_string(), - Filter::new( - SeccompAction::Trap, - SeccompAction::Allow, - vec![SyscallRule::new("wrong_syscall".to_string(), None)], - ), - ); - - assert_eq!( - compiler.compile_blob(wrong_syscall_name_filters, false), - Err(CompilationError::SyscallName( - "wrong_syscall".to_string(), - compiler.arch - )) - ); - - let mut identical_action_filters = BTreeMap::new(); - identical_action_filters.insert( - "T1".to_string(), - Filter::new(SeccompAction::Allow, SeccompAction::Allow, vec![]), - ); - - assert_eq!( - compiler.compile_blob(identical_action_filters, false), - Err(CompilationError::IdenticalActions) - ); - - // Test with correct filters. - let mut correct_filters = BTreeMap::new(); - correct_filters.insert( - "Thread1".to_string(), - Filter::new( - SeccompAction::Trap, - SeccompAction::Allow, - vec![ - SyscallRule::new("read".to_string(), None), - SyscallRule::new( - "futex".to_string(), - Some(vec![ - Cond::new(1, Dword, Eq, 65).unwrap(), - Cond::new(2, Qword, Le, 80).unwrap(), - ]), - ), - SyscallRule::new( - "futex".to_string(), - Some(vec![ - Cond::new(3, Dword, Eq, 65).unwrap(), - Cond::new(2, Qword, Le, 80).unwrap(), - ]), - ), - ], - ), - ); - - // We don't test the BPF compilation in this module. - // This is done in the seccomp/lib.rs module. - // Here, we only test the (Filter -> SeccompFilter) transformations. (High-level -> IR) - compiler - .compile_blob(correct_filters.clone(), false) - .unwrap(); - // Also test with basic filtering on. - compiler.compile_blob(correct_filters, true).unwrap(); - } - - #[test] - fn test_error_messages() { - assert_eq!( - format!("{}", CompilationError::IdenticalActions), - "`filter_action` and `default_action` are equal." - ); - assert_eq!( - format!( - "{}", - CompilationError::Filter(FilterError::InvalidArgumentNumber) - ), - "The seccomp rule contains an invalid argument number." - ); - assert_eq!( - format!( - "{}", - CompilationError::SyscallName("asdsad".to_string(), TargetArch::x86_64) - ), - format!( - "Invalid syscall name: {} for given arch: {}.", - "asdsad", "x86_64" - ) - ); - } -} diff --git a/src/seccompiler/src/lib.rs b/src/seccompiler/src/lib.rs index cc3e4756996..3fd62106275 100644 --- a/src/seccompiler/src/lib.rs +++ b/src/seccompiler/src/lib.rs @@ -1,270 +1,181 @@ -// Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 -#![warn(missing_docs)] - -//! The library crate that defines common helper functions that are generally used in -//! conjunction with seccompiler-bin. - -pub mod backend; -pub mod common; -pub mod compiler; -/// Syscall tables -pub mod syscall_table; - use std::collections::HashMap; -use std::fmt::Debug; -use std::io::Read; -use std::sync::Arc; +use std::fs::File; +use std::io::{Read, Seek}; +use std::os::fd::{AsRawFd, FromRawFd}; +use std::os::unix::fs::MetadataExt; +use std::str::FromStr; -use bincode::{DefaultOptions, Error as BincodeError, Options}; -use common::BPF_MAX_LEN; -// Re-export the data types needed for calling the helper functions. -pub use common::{sock_filter, BpfProgram}; +use bincode::Error as BincodeError; -/// Type that associates a thread category to a BPF program. -pub type BpfThreadMap = HashMap>; +mod bindings; +use bindings::*; -// BPF structure definition for filter array. -// See /usr/include/linux/filter.h . -#[repr(C)] -struct sock_fprog { - pub len: ::std::os::raw::c_ushort, - pub filter: *const sock_filter, -} - -/// Reference to program made up of a sequence of BPF instructions. -pub type BpfProgramRef<'a> = &'a [sock_filter]; +pub mod types; +pub use types::*; +use zerocopy::IntoBytes; -/// Binary filter deserialization errors. +/// Binary filter compilation errors. #[derive(Debug, thiserror::Error, displaydoc::Display)] -pub enum DeserializationError { - /// Bincode deserialization failed: {0} - Bincode(BincodeError), +pub enum CompilationError { + /// Cannot open input file: {0} + IntputOpen(std::io::Error), + /// Cannot read input file: {0} + InputRead(std::io::Error), + /// Cannot deserialize json: {0} + JsonDeserialize(serde_json::Error), + /// Cannot parse arch: {0} + ArchParse(String), + /// Cannot create libseccomp context + LibSeccompContext, + /// Cannot add libseccomp arch + LibSeccompArch, + /// Cannot add libseccomp syscall + LibSeccompSycall, + /// Cannot add libseccomp syscall rule + LibSeccompRule, + /// Cannot export libseccomp bpf + LibSeccompExport, + /// Cannot create memfd: {0} + MemfdCreate(std::io::Error), + /// Cannot rewind memfd: {0} + MemfdRewind(std::io::Error), + /// Cannot read from memfd: {0} + MemfdRead(std::io::Error), + /// Cannot create output file: {0} + OutputCreate(std::io::Error), + /// Cannot serialize bfp: {0} + BincodeSerialize(BincodeError), } -/// Filter installation errors. -#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] -pub enum InstallationError { - /// Filter length exceeds the maximum size of {BPF_MAX_LEN:} instructions - FilterTooLarge, - /// prctl` syscall failed with error code: {0} - Prctl(i32), -} - -/// Deserialize a BPF file into a collection of usable BPF filters. -/// Has an optional `bytes_limit` that is passed to bincode to constrain the maximum amount of -/// memory that we can allocate while performing the deserialization. -/// It's recommended that the integrator of the library uses this to prevent memory allocations -/// DOS-es. -pub fn deserialize_binary( - reader: R, - bytes_limit: Option, -) -> std::result::Result { - let result = match bytes_limit { - // Also add the default options. These are not part of the `DefaultOptions` as per - // this issue: https://github.com/servo/bincode/issues/333 - Some(limit) => DefaultOptions::new() - .with_fixint_encoding() - .allow_trailing_bytes() - .with_limit(limit) - .deserialize_from::>(reader), - // No limit is the default. - None => bincode::deserialize_from::>(reader), - }; - - Ok(result - .map_err(DeserializationError::Bincode)? - .into_iter() - .map(|(k, v)| (k.to_lowercase(), Arc::new(v))) - .collect()) -} - -/// Helper function for installing a BPF filter. -pub fn apply_filter(bpf_filter: BpfProgramRef) -> std::result::Result<(), InstallationError> { - // If the program is empty, don't install the filter. - if bpf_filter.is_empty() { - return Ok(()); - } +pub fn compile_bpf( + input_path: &str, + arch: &str, + out_path: &str, + basic: bool, +) -> Result<(), CompilationError> { + let mut file_content = String::new(); + File::open(input_path) + .map_err(CompilationError::IntputOpen)? + .read_to_string(&mut file_content) + .map_err(CompilationError::InputRead)?; + let bpf_map_json: BpfJson = + serde_json::from_str(&file_content).map_err(CompilationError::JsonDeserialize)?; + + let arch = TargetArch::from_str(arch).map_err(CompilationError::ArchParse)?; - // If the program length is greater than the limit allowed by the kernel, - // fail quickly. Otherwise, `prctl` will give a more cryptic error code. - let bpf_filter_len = - u16::try_from(bpf_filter.len()).map_err(|_| InstallationError::FilterTooLarge)?; - if bpf_filter_len > BPF_MAX_LEN { - return Err(InstallationError::FilterTooLarge); + // SAFETY: Safe because the parameters are valid. + let memfd_fd = unsafe { libc::memfd_create(c"bpf".as_ptr().cast(), 0) }; + if memfd_fd < 0 { + return Err(CompilationError::MemfdCreate( + std::io::Error::last_os_error(), + )); } // SAFETY: Safe because the parameters are valid. - unsafe { - { - let rc = libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - if rc != 0 { - return Err(InstallationError::Prctl(*libc::__errno_location())); + let mut memfd = unsafe { File::from_raw_fd(memfd_fd) }; + + let mut bpf_map: HashMap> = HashMap::new(); + for (name, filter) in bpf_map_json.0.iter() { + let default_action = filter.default_action.to_scmp_type(); + let filter_action = filter.filter_action.to_scmp_type(); + + // SAFETY: Safe as all args are correct. + let bpf_filter = { + let r = seccomp_init(default_action); + if r.is_null() { + return Err(CompilationError::LibSeccompContext); } - } - - let bpf_prog = sock_fprog { - len: bpf_filter_len, - filter: bpf_filter.as_ptr(), + r }; - let bpf_prog_ptr = &bpf_prog as *const sock_fprog; - { - let rc = libc::prctl( - libc::PR_SET_SECCOMP, - libc::SECCOMP_MODE_FILTER, - bpf_prog_ptr, - ); - if rc != 0 { - return Err(InstallationError::Prctl(*libc::__errno_location())); - } - } - } - - Ok(()) -} - -#[cfg(test)] -mod tests { - #![allow(clippy::undocumented_unsafe_blocks)] - - use std::collections::HashMap; - use std::sync::Arc; - use std::thread; - use super::*; - use crate::common::BpfProgram; - - #[test] - fn test_deserialize_binary() { - // Malformed bincode binary. - { - let data = "adassafvc".to_string(); - deserialize_binary(data.as_bytes(), None).unwrap_err(); + // SAFETY: Safe as all args are correct. + unsafe { + let r = seccomp_arch_add(bpf_filter, arch.to_scmp_type()); + if r != 0 && r != MINUS_EEXIST { + return Err(CompilationError::LibSeccompArch); + } } - // Test that the binary deserialization is correct, and that the thread keys - // have been lowercased. - { - let bpf_prog = vec![ - sock_filter { - code: 32, - jt: 0, - jf: 0, - k: 0, - }, - sock_filter { - code: 32, - jt: 0, - jf: 0, - k: 4, - }, - ]; - let mut filter_map: HashMap = HashMap::new(); - filter_map.insert("VcpU".to_string(), bpf_prog.clone()); - let bytes = bincode::serialize(&filter_map).unwrap(); - - let mut expected_res = BpfThreadMap::new(); - expected_res.insert("vcpu".to_string(), Arc::new(bpf_prog)); - assert_eq!(deserialize_binary(&bytes[..], None).unwrap(), expected_res); + for rule in filter.filter.iter() { + // SAFETY: Safe as all args are correct. + let syscall = unsafe { + let r = seccomp_syscall_resolve_name(rule.syscall.as_ptr()); + if r == __NR_SCMP_ERROR { + return Err(CompilationError::LibSeccompSycall); + } + r + }; + + // TODO remove when we drop deprecated "basic" arg from cli. + // "basic" bpf means it ignores condition checks. + if basic { + // SAFETY: Safe as all args are correct. + unsafe { + if seccomp_rule_add(bpf_filter, filter_action, syscall, 0) != 0 { + return Err(CompilationError::LibSeccompRule); + } + } + } else if let Some(rules) = &rule.args { + let comparators = rules + .iter() + .map(|rule| rule.to_scmp_type()) + .collect::>(); + + // SAFETY: Safe as all args are correct. + // We can assume no one will define u32::MAX + // filters for a syscall. + #[allow(clippy::cast_possible_truncation)] + unsafe { + if seccomp_rule_add_array( + bpf_filter, + filter_action, + syscall, + comparators.len() as u32, + comparators.as_ptr(), + ) != 0 + { + return Err(CompilationError::LibSeccompRule); + } + } + } else { + // SAFETY: Safe as all args are correct. + unsafe { + if seccomp_rule_add(bpf_filter, filter_action, syscall, 0) != 0 { + return Err(CompilationError::LibSeccompRule); + } + } + } } - // Test deserialization with binary_limit. - { - let bpf_prog = vec![sock_filter { - code: 32, - jt: 0, - jf: 0, - k: 0, - }]; - - let mut filter_map: HashMap = HashMap::new(); - filter_map.insert("t1".to_string(), bpf_prog.clone()); - - let bytes = bincode::serialize(&filter_map).unwrap(); - - // Binary limit too low. - assert!(matches!( - deserialize_binary(&bytes[..], Some(20)).unwrap_err(), - DeserializationError::Bincode(error) - if error.to_string() == "the size limit has been reached" - )); - - let mut expected_res = BpfThreadMap::new(); - expected_res.insert("t1".to_string(), Arc::new(bpf_prog)); - - // Correct binary limit. - assert_eq!( - deserialize_binary(&bytes[..], Some(50)).unwrap(), - expected_res - ); + // SAFETY: Safe as all args are correect. + unsafe { + if seccomp_export_bpf(bpf_filter, memfd.as_raw_fd()) != 0 { + return Err(CompilationError::LibSeccompExport); + } } + memfd.rewind().map_err(CompilationError::MemfdRewind)?; + + // Cast is safe because usize == u64 + #[allow(clippy::cast_possible_truncation)] + let size = memfd.metadata().unwrap().size() as usize; + // Bpf instructions are 8 byte values and 4 byte alignment. + // We use u64 to satisfy these requirements. + let instructions = size / std::mem::size_of::(); + let mut bpf = vec![0_u64; instructions]; + + memfd + .read_exact(bpf.as_mut_bytes()) + .map_err(CompilationError::MemfdRead)?; + memfd.rewind().map_err(CompilationError::MemfdRewind)?; + + bpf_map.insert(name.clone(), bpf); } - #[test] - fn test_filter_apply() { - // Test filter too large. - thread::spawn(|| { - let filter: BpfProgram = vec![ - sock_filter { - code: 6, - jt: 0, - jf: 0, - k: 0, - }; - 5000 // Limit is 4096 - ]; - - // Apply seccomp filter. - assert_eq!( - apply_filter(&filter).unwrap_err(), - InstallationError::FilterTooLarge - ); - }) - .join() - .unwrap(); + let output_file = File::create(out_path).map_err(CompilationError::OutputCreate)?; - // Test empty filter. - thread::spawn(|| { - let filter: BpfProgram = vec![]; - - assert_eq!(filter.len(), 0); - - let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; - assert_eq!(seccomp_level, 0); - - apply_filter(&filter).unwrap(); - - // test that seccomp level remains 0 on failure. - let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; - assert_eq!(seccomp_level, 0); - }) - .join() - .unwrap(); - - // Test invalid BPF code. - thread::spawn(|| { - let filter = vec![sock_filter { - // invalid opcode - code: 9999, - jt: 0, - jf: 0, - k: 0, - }]; - - let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; - assert_eq!(seccomp_level, 0); - - assert_eq!( - apply_filter(&filter).unwrap_err(), - InstallationError::Prctl(22) - ); - - // test that seccomp level remains 0 on failure. - let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; - assert_eq!(seccomp_level, 0); - }) - .join() - .unwrap(); - } + bincode::serialize_into(output_file, &bpf_map).map_err(CompilationError::BincodeSerialize)?; + Ok(()) } diff --git a/src/seccompiler/src/seccompiler_bin.rs b/src/seccompiler/src/seccompiler_bin.rs deleted file mode 100644 index 890a2a3ecdb..00000000000 --- a/src/seccompiler/src/seccompiler_bin.rs +++ /dev/null @@ -1,578 +0,0 @@ -// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -//! seccompiler-bin is a program that compiles multi-threaded seccomp-bpf filters expressed as JSON -//! into raw BPF programs, serializing them and outputting them to a file. -//! -//! Used in conjunction with the provided library crate, one can deserialize the binary filters -//! and easily install them on a per-thread basis, in order to achieve a quick and robust -//! seccomp-based jailing solution. -//! -//! See the documentation on github for more information. -//! -//! ```text -//! The compilation goes through a couple of steps, from JSON to BPF: -//! -//! JSON -//! | -//! (via serde_json) -//! | -//! V -//! collection of `Filter` objects -//! | -//! (via Compiler.compile_blob(...)) -//! | -//! V -//! collection of `SeccompFilter` objects -//! (IR - intermediate representation) -//! | -//! (via SeccompFilter.try_into::(...)) -//! | -//! V -//! collection of `BpfProgram` objects -//! ``` - -use std::collections::BTreeMap; -use std::convert::TryInto; -use std::fs::File; -use std::io::BufReader; -use std::path::PathBuf; - -mod backend; -mod common; -mod compiler; -mod syscall_table; - -use backend::{TargetArch, TargetArchError}; -use bincode::Error as BincodeError; -use common::BpfProgram; -use compiler::{CompilationError, Compiler, JsonFile}; -use serde_json::error::Error as JSONError; -use utils::arg_parser::{ - ArgParser, Argument, Arguments as ArgumentsBag, UtilsArgParserError as ArgParserError, -}; - -const SECCOMPILER_VERSION: &str = env!("CARGO_PKG_VERSION"); -const DEFAULT_OUTPUT_FILENAME: &str = "seccomp_binary_filter.out"; - -#[derive(Debug, thiserror::Error)] -enum SeccompError { - #[error("Bincode (de)serialization failed: {0}")] - Bincode(BincodeError), - #[error("{0}")] - Compilation(CompilationError), - #[error("{}", format!("Failed to open file {:?}: {1}", .0, .1).replace('\"', ""))] - FileOpen(PathBuf, std::io::Error), - #[error("Error parsing JSON: {0}")] - Json(JSONError), - #[error("Missing input file.")] - MissingInputFile, - #[error("Missing target arch.")] - MissingTargetArch, - #[error("{0}")] - Arch(#[from] TargetArchError), -} - -#[derive(Debug, PartialEq)] -struct Arguments { - input_file: String, - output_file: String, - target_arch: TargetArch, - is_basic: bool, -} - -fn build_arg_parser() -> ArgParser<'static> { - ArgParser::new() - .arg( - Argument::new("input-file") - .required(true) - .takes_value(true) - .help("File path of the JSON input."), - ) - .arg( - Argument::new("output-file") - .required(false) - .takes_value(true) - .default_value(DEFAULT_OUTPUT_FILENAME) - .help("Optional path of the output file."), - ) - .arg( - Argument::new("target-arch") - .required(true) - .takes_value(true) - .help( - "The computer architecture where the BPF program runs. Supported \ - architectures: x86_64, aarch64.", - ), - ) - .arg(Argument::new("basic").takes_value(false).help( - "Deprecated! Transforms the filters into basic filters. Drops all argument checks and \ - rule-level actions. Not recommended.", - )) -} - -fn get_argument_values(arguments: &ArgumentsBag) -> Result { - let Some(arch_string) = arguments.single_value("target-arch") else { - return Err(SeccompError::MissingTargetArch); - }; - let target_arch: TargetArch = arch_string.as_str().try_into()?; - - let Some(input_file) = arguments.single_value("input-file") else { - return Err(SeccompError::MissingInputFile); - }; - - let is_basic = arguments.flag_present("basic"); - if is_basic { - println!( - "Warning! You are using a deprecated parameter: --basic, that will be removed in a \ - future version.\n" - ); - } - - Ok(Arguments { - target_arch, - input_file: input_file.to_owned(), - // Safe to unwrap because it has a default value - output_file: arguments.single_value("output-file").unwrap().to_owned(), - is_basic, - }) -} - -fn compile(args: &Arguments) -> Result<(), SeccompError> { - let input_file = File::open(&args.input_file) - .map_err(|err| SeccompError::FileOpen(PathBuf::from(&args.input_file), err))?; - let mut input_reader = BufReader::new(input_file); - let filters = - serde_json::from_reader::<_, JsonFile>(&mut input_reader).map_err(SeccompError::Json)?; - let compiler = Compiler::new(args.target_arch); - - // transform the IR into a Map of BPFPrograms - let bpf_data: BTreeMap = compiler - .compile_blob(filters.0, args.is_basic) - .map_err(SeccompError::Compilation)?; - - // serialize the BPF programs & output them to a file - let output_file = File::create(&args.output_file) - .map_err(|err| SeccompError::FileOpen(PathBuf::from(&args.output_file), err))?; - bincode::serialize_into(output_file, &bpf_data).map_err(SeccompError::Bincode)?; - - Ok(()) -} - -#[derive(Debug, thiserror::Error, displaydoc::Display)] -enum SeccompilerError { - /// Argument Parsing Error: {0} - ArgParsing(ArgParserError), - /// {0} \n\nFor more information try --help. - InvalidArgumentValue(SeccompError), - /// {0} - Error(SeccompError), -} - -fn main() -> core::result::Result<(), SeccompilerError> { - let result = main_exec(); - if let Err(e) = result { - eprintln!("{}", e); - Err(e) - } else { - Ok(()) - } -} - -fn main_exec() -> core::result::Result<(), SeccompilerError> { - let mut arg_parser = build_arg_parser(); - - arg_parser - .parse_from_cmdline() - .map_err(SeccompilerError::ArgParsing)?; - - if arg_parser.arguments().flag_present("help") { - println!("Seccompiler-bin v{}\n", SECCOMPILER_VERSION); - println!("{}", arg_parser.formatted_help()); - return Ok(()); - } - if arg_parser.arguments().flag_present("version") { - println!("Seccompiler-bin v{}\n", SECCOMPILER_VERSION); - return Ok(()); - } - - let args = get_argument_values(arg_parser.arguments()) - .map_err(SeccompilerError::InvalidArgumentValue)?; - - compile(&args).map_err(SeccompilerError::Error)?; - - println!("Filter successfully compiled into: {}", args.output_file); - Ok(()) -} - -#[cfg(test)] -mod tests { - #![allow(clippy::undocumented_unsafe_blocks)] - - use std::io; - use std::io::Write; - use std::path::PathBuf; - - use bincode::Error as BincodeError; - use vmm_sys_util::tempfile::TempFile; - - use super::compiler::CompilationError as FilterFormatError; - use super::{ - build_arg_parser, compile, get_argument_values, Arguments, SeccompError, - DEFAULT_OUTPUT_FILENAME, - }; - use crate::backend::{TargetArch, TargetArchError}; - - // Correct JSON input data - static CORRECT_JSON_INPUT: &str = r#" - { - "thread_1": { - "default_action": { - "errno": 12 - }, - "filter_action": "allow", - "filter": [ - { - "syscall": "open" - }, - { - "syscall": "close" - }, - { - "syscall": "stat" - }, - { - "syscall": "futex", - "args": [ - { - "index": 2, - "type": "dword", - "op": "le", - "val": 65 - }, - { - "index": 1, - "type": "qword", - "op": "ne", - "val": 80 - } - ] - }, - { - "syscall": "futex", - "args": [ - { - "index": 3, - "type": "qword", - "op": "gt", - "val": 65 - }, - { - "index": 1, - "type": "qword", - "op": "lt", - "val": 80 - } - ] - }, - { - "syscall": "futex", - "args": [ - { - "index": 3, - "type": "qword", - "op": "ge", - "val": 65 - } - ] - }, - { - "syscall": "ioctl", - "args": [ - { - "index": 3, - "type": "dword", - "op": { - "masked_eq": 100 - }, - "val": 65 - } - ] - } - ] - }, - "thread_2": { - "default_action": "trap", - "filter_action": "allow", - "filter": [ - { - "syscall": "ioctl", - "args": [ - { - "index": 3, - "type": "dword", - "op": "eq", - "val": 65 - } - ] - } - ] - } - } - "#; - - #[test] - fn test_error_messages() { - let path = PathBuf::from("/path"); - assert_eq!( - format!( - "{}", - SeccompError::Bincode(BincodeError::new(bincode::ErrorKind::SizeLimit)) - ), - format!( - "Bincode (de)serialization failed: {}", - BincodeError::new(bincode::ErrorKind::SizeLimit) - ) - ); - assert_eq!( - format!( - "{}", - SeccompError::Compilation(FilterFormatError::SyscallName( - "dsaa".to_string(), - TargetArch::aarch64 - )) - ), - format!( - "{}", - FilterFormatError::SyscallName("dsaa".to_string(), TargetArch::aarch64) - ) - ); - assert_eq!( - format!( - "{}", - SeccompError::FileOpen(path.clone(), io::Error::from_raw_os_error(2)) - ), - format!( - "Failed to open file {:?}: {}", - path, - io::Error::from_raw_os_error(2) - ) - .replace('\"', "") - ); - assert_eq!( - format!( - "{}", - SeccompError::Json(serde_json::from_str::("").unwrap_err()) - ), - format!( - "Error parsing JSON: {}", - serde_json::from_str::("").unwrap_err() - ) - ); - assert_eq!( - format!("{}", SeccompError::MissingInputFile), - "Missing input file." - ); - assert_eq!( - format!("{}", SeccompError::MissingTargetArch), - "Missing target arch." - ); - assert_eq!( - format!( - "{}", - SeccompError::Arch(TargetArchError::InvalidString("lala".to_string())) - ), - format!("{}", TargetArchError::InvalidString("lala".to_string())) - ); - } - - #[test] - fn test_get_argument_values() { - let arg_parser = build_arg_parser(); - // correct arguments - let arguments = &mut arg_parser.arguments().clone(); - arguments - .parse( - vec![ - "seccompiler-bin", - "--input-file", - "foo.txt", - "--target-arch", - "x86_64", - ] - .into_iter() - .map(String::from) - .collect::>() - .as_ref(), - ) - .unwrap(); - assert_eq!( - get_argument_values(arguments).unwrap(), - Arguments { - input_file: "foo.txt".to_string(), - output_file: DEFAULT_OUTPUT_FILENAME.to_string(), - target_arch: TargetArch::x86_64, - is_basic: false, - } - ); - - let arguments = &mut arg_parser.arguments().clone(); - arguments - .parse( - vec![ - "seccompiler-bin", - "--input-file", - "foo.txt", - "--target-arch", - "x86_64", - "--output-file", - "/path.to/file.txt", - "--basic", - ] - .into_iter() - .map(String::from) - .collect::>() - .as_ref(), - ) - .unwrap(); - assert_eq!( - get_argument_values(arguments).unwrap(), - Arguments { - input_file: "foo.txt".to_string(), - output_file: "/path.to/file.txt".to_string(), - target_arch: TargetArch::x86_64, - is_basic: true - } - ); - - // no args - let arguments = &mut arg_parser.arguments().clone(); - assert!(arguments - .parse( - vec!["seccompiler-bin"] - .into_iter() - .map(String::from) - .collect::>() - .as_ref(), - ) - .is_err()); - - // missing --target-arch - let arguments = &mut arg_parser.arguments().clone(); - assert!(arguments - .parse( - vec!["seccompiler-bin", "--input-file", "foo.txt"] - .into_iter() - .map(String::from) - .collect::>() - .as_ref(), - ) - .is_err()); - - // missing --input-file - let arguments = &mut arg_parser.arguments().clone(); - assert!(arguments - .parse( - vec!["seccompiler-bin", "--target-arch", "x86_64"] - .into_iter() - .map(String::from) - .collect::>() - .as_ref(), - ) - .is_err()); - - // invalid --target-arch - let arguments = &mut arg_parser.arguments().clone(); - arguments - .parse( - vec![ - "seccompiler-bin", - "--input-file", - "foo.txt", - "--target-arch", - "x86_64das", - "--output-file", - "/path.to/file.txt", - ] - .into_iter() - .map(String::from) - .collect::>() - .as_ref(), - ) - .unwrap(); - get_argument_values(arguments).unwrap_err(); - - // invalid value supplied to --basic - let arguments = &mut arg_parser.arguments().clone(); - assert!(arguments - .parse( - vec![ - "seccompiler-bin", - "--input-file", - "foo.txt", - "--target-arch", - "x86_64", - "--basic", - "invalid", - ] - .into_iter() - .map(String::from) - .collect::>() - .as_ref(), - ) - .is_err()); - } - - #[allow(clippy::useless_asref)] - #[test] - fn test_compile() { - // --input-file was deleted - { - let mut in_file = TempFile::new().unwrap(); - in_file.remove().unwrap(); - let args = Arguments { - input_file: in_file.as_path().to_str().unwrap().to_string(), - target_arch: TargetArch::x86_64, - output_file: "bpf.out".to_string(), - is_basic: false, - }; - - match compile(&args).unwrap_err() { - SeccompError::FileOpen(buf, _) => assert_eq!(buf, PathBuf::from(in_file.as_path())), - _ => panic!("Expected FileOpen error."), - } - } - - // test a successful compilation - { - let in_file = TempFile::new().unwrap(); - let out_file = TempFile::new().unwrap(); - - in_file - .as_file() - .write_all(CORRECT_JSON_INPUT.as_bytes()) - .unwrap(); - - let arguments = Arguments { - input_file: in_file.as_path().to_str().unwrap().to_string(), - output_file: out_file.as_path().to_str().unwrap().to_string(), - target_arch: TargetArch::x86_64, - is_basic: false, - }; - - // do the compilation & check for errors - compile(&arguments).unwrap(); - - // also check with is_basic: true - let arguments = Arguments { - input_file: in_file.as_path().to_str().unwrap().to_string(), - output_file: out_file.as_path().to_str().unwrap().to_string(), - target_arch: TargetArch::x86_64, - is_basic: true, - }; - - // do the compilation & check for errors - compile(&arguments).unwrap(); - } - } -} diff --git a/src/seccompiler/src/syscall_table/aarch64.rs b/src/seccompiler/src/syscall_table/aarch64.rs deleted file mode 100644 index 386d09b78d3..00000000000 --- a/src/seccompiler/src/syscall_table/aarch64.rs +++ /dev/null @@ -1,308 +0,0 @@ -// Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -// This file is auto-generated by `tools/devtool generate_syscall_tables`. -// Do NOT manually edit! -// Generated at: Mon 15 Nov 11:41:50 UTC 2021 -// Kernel version: 5.10 - -use std::collections::HashMap; - -pub fn make_syscall_table(map: &mut HashMap) { - map.insert("accept4".to_string(), 242); - map.insert("accept".to_string(), 202); - map.insert("acct".to_string(), 89); - map.insert("add_key".to_string(), 217); - map.insert("adjtimex".to_string(), 171); - map.insert("bind".to_string(), 200); - map.insert("bpf".to_string(), 280); - map.insert("brk".to_string(), 214); - map.insert("capget".to_string(), 90); - map.insert("capset".to_string(), 91); - map.insert("chdir".to_string(), 49); - map.insert("chroot".to_string(), 51); - map.insert("clock_adjtime".to_string(), 266); - map.insert("clock_getres".to_string(), 114); - map.insert("clock_gettime".to_string(), 113); - map.insert("clock_nanosleep".to_string(), 115); - map.insert("clock_settime".to_string(), 112); - map.insert("clone3".to_string(), 435); - map.insert("clone".to_string(), 220); - map.insert("close_range".to_string(), 436); - map.insert("close".to_string(), 57); - map.insert("connect".to_string(), 203); - map.insert("copy_file_range".to_string(), 285); - map.insert("delete_module".to_string(), 106); - map.insert("dup3".to_string(), 24); - map.insert("dup".to_string(), 23); - map.insert("epoll_create1".to_string(), 20); - map.insert("epoll_ctl".to_string(), 21); - map.insert("epoll_pwait".to_string(), 22); - map.insert("eventfd2".to_string(), 19); - map.insert("execveat".to_string(), 281); - map.insert("execve".to_string(), 221); - map.insert("exit_group".to_string(), 94); - map.insert("exit".to_string(), 93); - map.insert("faccessat2".to_string(), 439); - map.insert("faccessat".to_string(), 48); - map.insert("fadvise64".to_string(), 223); - map.insert("fallocate".to_string(), 47); - map.insert("fanotify_init".to_string(), 262); - map.insert("fanotify_mark".to_string(), 263); - map.insert("fchdir".to_string(), 50); - map.insert("fchmodat".to_string(), 53); - map.insert("fchmod".to_string(), 52); - map.insert("fchownat".to_string(), 54); - map.insert("fchown".to_string(), 55); - map.insert("fcntl".to_string(), 25); - map.insert("fdatasync".to_string(), 83); - map.insert("fgetxattr".to_string(), 10); - map.insert("finit_module".to_string(), 273); - map.insert("flistxattr".to_string(), 13); - map.insert("flock".to_string(), 32); - map.insert("fremovexattr".to_string(), 16); - map.insert("fsconfig".to_string(), 431); - map.insert("fsetxattr".to_string(), 7); - map.insert("fsmount".to_string(), 432); - map.insert("fsopen".to_string(), 430); - map.insert("fspick".to_string(), 433); - map.insert("fstatfs".to_string(), 44); - map.insert("fstat".to_string(), 80); - map.insert("fsync".to_string(), 82); - map.insert("ftruncate".to_string(), 46); - map.insert("futex".to_string(), 98); - map.insert("getcpu".to_string(), 168); - map.insert("getcwd".to_string(), 17); - map.insert("getdents64".to_string(), 61); - map.insert("getegid".to_string(), 177); - map.insert("geteuid".to_string(), 175); - map.insert("getgid".to_string(), 176); - map.insert("getgroups".to_string(), 158); - map.insert("getitimer".to_string(), 102); - map.insert("get_mempolicy".to_string(), 236); - map.insert("getpeername".to_string(), 205); - map.insert("getpgid".to_string(), 155); - map.insert("getpid".to_string(), 172); - map.insert("getppid".to_string(), 173); - map.insert("getpriority".to_string(), 141); - map.insert("getrandom".to_string(), 278); - map.insert("getresgid".to_string(), 150); - map.insert("getresuid".to_string(), 148); - map.insert("getrlimit".to_string(), 163); - map.insert("get_robust_list".to_string(), 100); - map.insert("getrusage".to_string(), 165); - map.insert("getsid".to_string(), 156); - map.insert("getsockname".to_string(), 204); - map.insert("getsockopt".to_string(), 209); - map.insert("gettid".to_string(), 178); - map.insert("gettimeofday".to_string(), 169); - map.insert("getuid".to_string(), 174); - map.insert("getxattr".to_string(), 8); - map.insert("init_module".to_string(), 105); - map.insert("inotify_add_watch".to_string(), 27); - map.insert("inotify_init1".to_string(), 26); - map.insert("inotify_rm_watch".to_string(), 28); - map.insert("io_cancel".to_string(), 3); - map.insert("ioctl".to_string(), 29); - map.insert("io_destroy".to_string(), 1); - map.insert("io_getevents".to_string(), 4); - map.insert("io_pgetevents".to_string(), 292); - map.insert("ioprio_get".to_string(), 31); - map.insert("ioprio_set".to_string(), 30); - map.insert("io_setup".to_string(), 0); - map.insert("io_submit".to_string(), 2); - map.insert("io_uring_enter".to_string(), 426); - map.insert("io_uring_register".to_string(), 427); - map.insert("io_uring_setup".to_string(), 425); - map.insert("kcmp".to_string(), 272); - map.insert("kexec_file_load".to_string(), 294); - map.insert("kexec_load".to_string(), 104); - map.insert("keyctl".to_string(), 219); - map.insert("kill".to_string(), 129); - map.insert("lgetxattr".to_string(), 9); - map.insert("linkat".to_string(), 37); - map.insert("listen".to_string(), 201); - map.insert("listxattr".to_string(), 11); - map.insert("llistxattr".to_string(), 12); - map.insert("lookup_dcookie".to_string(), 18); - map.insert("lremovexattr".to_string(), 15); - map.insert("lseek".to_string(), 62); - map.insert("lsetxattr".to_string(), 6); - map.insert("madvise".to_string(), 233); - map.insert("mbind".to_string(), 235); - map.insert("membarrier".to_string(), 283); - map.insert("memfd_create".to_string(), 279); - map.insert("migrate_pages".to_string(), 238); - map.insert("mincore".to_string(), 232); - map.insert("mkdirat".to_string(), 34); - map.insert("mknodat".to_string(), 33); - map.insert("mlock2".to_string(), 284); - map.insert("mlockall".to_string(), 230); - map.insert("mlock".to_string(), 228); - map.insert("mmap".to_string(), 222); - map.insert("mount".to_string(), 40); - map.insert("move_mount".to_string(), 429); - map.insert("move_pages".to_string(), 239); - map.insert("mprotect".to_string(), 226); - map.insert("mq_getsetattr".to_string(), 185); - map.insert("mq_notify".to_string(), 184); - map.insert("mq_open".to_string(), 180); - map.insert("mq_timedreceive".to_string(), 183); - map.insert("mq_timedsend".to_string(), 182); - map.insert("mq_unlink".to_string(), 181); - map.insert("mremap".to_string(), 216); - map.insert("msgctl".to_string(), 187); - map.insert("msgget".to_string(), 186); - map.insert("msgrcv".to_string(), 188); - map.insert("msgsnd".to_string(), 189); - map.insert("msync".to_string(), 227); - map.insert("munlockall".to_string(), 231); - map.insert("munlock".to_string(), 229); - map.insert("munmap".to_string(), 215); - map.insert("name_to_handle_at".to_string(), 264); - map.insert("nanosleep".to_string(), 101); - map.insert("newfstatat".to_string(), 79); - map.insert("nfsservctl".to_string(), 42); - map.insert("openat2".to_string(), 437); - map.insert("openat".to_string(), 56); - map.insert("open_by_handle_at".to_string(), 265); - map.insert("open_tree".to_string(), 428); - map.insert("perf_event_open".to_string(), 241); - map.insert("personality".to_string(), 92); - map.insert("pidfd_getfd".to_string(), 438); - map.insert("pidfd_open".to_string(), 434); - map.insert("pidfd_send_signal".to_string(), 424); - map.insert("pipe2".to_string(), 59); - map.insert("pivot_root".to_string(), 41); - map.insert("pkey_alloc".to_string(), 289); - map.insert("pkey_free".to_string(), 290); - map.insert("pkey_mprotect".to_string(), 288); - map.insert("ppoll".to_string(), 73); - map.insert("prctl".to_string(), 167); - map.insert("pread64".to_string(), 67); - map.insert("preadv2".to_string(), 286); - map.insert("preadv".to_string(), 69); - map.insert("prlimit64".to_string(), 261); - map.insert("process_madvise".to_string(), 440); - map.insert("process_vm_readv".to_string(), 270); - map.insert("process_vm_writev".to_string(), 271); - map.insert("pselect6".to_string(), 72); - map.insert("ptrace".to_string(), 117); - map.insert("pwrite64".to_string(), 68); - map.insert("pwritev2".to_string(), 287); - map.insert("pwritev".to_string(), 70); - map.insert("quotactl".to_string(), 60); - map.insert("readahead".to_string(), 213); - map.insert("readlinkat".to_string(), 78); - map.insert("read".to_string(), 63); - map.insert("readv".to_string(), 65); - map.insert("reboot".to_string(), 142); - map.insert("recvfrom".to_string(), 207); - map.insert("recvmmsg".to_string(), 243); - map.insert("recvmsg".to_string(), 212); - map.insert("remap_file_pages".to_string(), 234); - map.insert("removexattr".to_string(), 14); - map.insert("renameat2".to_string(), 276); - map.insert("renameat".to_string(), 38); - map.insert("request_key".to_string(), 218); - map.insert("restart_syscall".to_string(), 128); - map.insert("rseq".to_string(), 293); - map.insert("rt_sigaction".to_string(), 134); - map.insert("rt_sigpending".to_string(), 136); - map.insert("rt_sigprocmask".to_string(), 135); - map.insert("rt_sigqueueinfo".to_string(), 138); - map.insert("rt_sigreturn".to_string(), 139); - map.insert("rt_sigsuspend".to_string(), 133); - map.insert("rt_sigtimedwait".to_string(), 137); - map.insert("rt_tgsigqueueinfo".to_string(), 240); - map.insert("sched_getaffinity".to_string(), 123); - map.insert("sched_getattr".to_string(), 275); - map.insert("sched_getparam".to_string(), 121); - map.insert("sched_get_priority_max".to_string(), 125); - map.insert("sched_get_priority_min".to_string(), 126); - map.insert("sched_getscheduler".to_string(), 120); - map.insert("sched_rr_get_interval".to_string(), 127); - map.insert("sched_setaffinity".to_string(), 122); - map.insert("sched_setattr".to_string(), 274); - map.insert("sched_setparam".to_string(), 118); - map.insert("sched_setscheduler".to_string(), 119); - map.insert("sched_yield".to_string(), 124); - map.insert("seccomp".to_string(), 277); - map.insert("semctl".to_string(), 191); - map.insert("semget".to_string(), 190); - map.insert("semop".to_string(), 193); - map.insert("semtimedop".to_string(), 192); - map.insert("sendfile".to_string(), 71); - map.insert("sendmmsg".to_string(), 269); - map.insert("sendmsg".to_string(), 211); - map.insert("sendto".to_string(), 206); - map.insert("setdomainname".to_string(), 162); - map.insert("setfsgid".to_string(), 152); - map.insert("setfsuid".to_string(), 151); - map.insert("setgid".to_string(), 144); - map.insert("setgroups".to_string(), 159); - map.insert("sethostname".to_string(), 161); - map.insert("setitimer".to_string(), 103); - map.insert("set_mempolicy".to_string(), 237); - map.insert("setns".to_string(), 268); - map.insert("setpgid".to_string(), 154); - map.insert("setpriority".to_string(), 140); - map.insert("setregid".to_string(), 143); - map.insert("setresgid".to_string(), 149); - map.insert("setresuid".to_string(), 147); - map.insert("setreuid".to_string(), 145); - map.insert("setrlimit".to_string(), 164); - map.insert("set_robust_list".to_string(), 99); - map.insert("setsid".to_string(), 157); - map.insert("setsockopt".to_string(), 208); - map.insert("set_tid_address".to_string(), 96); - map.insert("settimeofday".to_string(), 170); - map.insert("setuid".to_string(), 146); - map.insert("setxattr".to_string(), 5); - map.insert("shmat".to_string(), 196); - map.insert("shmctl".to_string(), 195); - map.insert("shmdt".to_string(), 197); - map.insert("shmget".to_string(), 194); - map.insert("shutdown".to_string(), 210); - map.insert("sigaltstack".to_string(), 132); - map.insert("signalfd4".to_string(), 74); - map.insert("socketpair".to_string(), 199); - map.insert("socket".to_string(), 198); - map.insert("splice".to_string(), 76); - map.insert("statfs".to_string(), 43); - map.insert("statx".to_string(), 291); - map.insert("swapoff".to_string(), 225); - map.insert("swapon".to_string(), 224); - map.insert("symlinkat".to_string(), 36); - map.insert("sync_file_range".to_string(), 84); - map.insert("syncfs".to_string(), 267); - map.insert("sync".to_string(), 81); - map.insert("sysinfo".to_string(), 179); - map.insert("syslog".to_string(), 116); - map.insert("tee".to_string(), 77); - map.insert("tgkill".to_string(), 131); - map.insert("timer_create".to_string(), 107); - map.insert("timer_delete".to_string(), 111); - map.insert("timerfd_create".to_string(), 85); - map.insert("timerfd_gettime".to_string(), 87); - map.insert("timerfd_settime".to_string(), 86); - map.insert("timer_getoverrun".to_string(), 109); - map.insert("timer_gettime".to_string(), 108); - map.insert("timer_settime".to_string(), 110); - map.insert("times".to_string(), 153); - map.insert("tkill".to_string(), 130); - map.insert("truncate".to_string(), 45); - map.insert("umask".to_string(), 166); - map.insert("umount2".to_string(), 39); - map.insert("uname".to_string(), 160); - map.insert("unlinkat".to_string(), 35); - map.insert("unshare".to_string(), 97); - map.insert("userfaultfd".to_string(), 282); - map.insert("utimensat".to_string(), 88); - map.insert("vhangup".to_string(), 58); - map.insert("vmsplice".to_string(), 75); - map.insert("wait4".to_string(), 260); - map.insert("waitid".to_string(), 95); - map.insert("write".to_string(), 64); - map.insert("writev".to_string(), 66); -} diff --git a/src/seccompiler/src/syscall_table/mod.rs b/src/seccompiler/src/syscall_table/mod.rs deleted file mode 100644 index 3dca50c748d..00000000000 --- a/src/seccompiler/src/syscall_table/mod.rs +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -mod aarch64; -mod x86_64; - -use std::collections::HashMap; - -use crate::backend::TargetArch; - -/// Creates and owns a mapping from the arch-specific syscall name to the right number. -#[derive(Debug)] -pub struct SyscallTable { - map: HashMap, - arch: TargetArch, -} - -/// Number of syscalls for x86_64 (rough upper bound). -const MAP_CAPACITY: usize = 351; - -impl SyscallTable { - /// Create new syscall table - pub fn new(arch: TargetArch) -> Self { - let mut instance = Self { - arch, - map: HashMap::with_capacity(MAP_CAPACITY), - }; - - instance.populate_map(); - - instance - } - - /// Returns the arch-specific syscall number based on the given name. - pub fn get_syscall_nr(&self, sys_name: &str) -> Option { - self.map.get(sys_name).copied() - } - - /// Populates the arch-specific syscall map. - fn populate_map(&mut self) { - match self.arch { - TargetArch::aarch64 => aarch64::make_syscall_table(&mut self.map), - TargetArch::x86_64 => x86_64::make_syscall_table(&mut self.map), - } - } -} - -#[cfg(test)] -mod tests { - use super::SyscallTable; - use crate::backend::TargetArch; - - #[test] - fn test_get_syscall_nr() { - // get number for a valid syscall - let instance_x86_64 = SyscallTable::new(TargetArch::x86_64); - let instance_aarch64 = SyscallTable::new(TargetArch::aarch64); - - assert_eq!(instance_x86_64.get_syscall_nr("close").unwrap(), 3); - assert_eq!(instance_aarch64.get_syscall_nr("close").unwrap(), 57); - - // invalid syscall name - assert!(instance_x86_64.get_syscall_nr("nosyscall").is_none()); - assert!(instance_aarch64.get_syscall_nr("nosyscall").is_none()); - } -} diff --git a/src/seccompiler/src/syscall_table/x86_64.rs b/src/seccompiler/src/syscall_table/x86_64.rs deleted file mode 100644 index 9350bd5ce57..00000000000 --- a/src/seccompiler/src/syscall_table/x86_64.rs +++ /dev/null @@ -1,364 +0,0 @@ -// Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -// This file is auto-generated by `tools/devtool generate_syscall_tables`. -// Do NOT manually edit! -// Generated at: Mon 15 Nov 11:41:50 UTC 2021 -// Kernel version: 5.10 - -use std::collections::HashMap; - -pub fn make_syscall_table(map: &mut HashMap) { - map.insert("accept4".to_string(), 288); - map.insert("accept".to_string(), 43); - map.insert("access".to_string(), 21); - map.insert("acct".to_string(), 163); - map.insert("add_key".to_string(), 248); - map.insert("adjtimex".to_string(), 159); - map.insert("afs_syscall".to_string(), 183); - map.insert("alarm".to_string(), 37); - map.insert("arch_prctl".to_string(), 158); - map.insert("bind".to_string(), 49); - map.insert("bpf".to_string(), 321); - map.insert("brk".to_string(), 12); - map.insert("capget".to_string(), 125); - map.insert("capset".to_string(), 126); - map.insert("chdir".to_string(), 80); - map.insert("chmod".to_string(), 90); - map.insert("chown".to_string(), 92); - map.insert("chroot".to_string(), 161); - map.insert("clock_adjtime".to_string(), 305); - map.insert("clock_getres".to_string(), 229); - map.insert("clock_gettime".to_string(), 228); - map.insert("clock_nanosleep".to_string(), 230); - map.insert("clock_settime".to_string(), 227); - map.insert("clone3".to_string(), 435); - map.insert("clone".to_string(), 56); - map.insert("close_range".to_string(), 436); - map.insert("close".to_string(), 3); - map.insert("connect".to_string(), 42); - map.insert("copy_file_range".to_string(), 326); - map.insert("create_module".to_string(), 174); - map.insert("creat".to_string(), 85); - map.insert("delete_module".to_string(), 176); - map.insert("dup2".to_string(), 33); - map.insert("dup3".to_string(), 292); - map.insert("dup".to_string(), 32); - map.insert("epoll_create1".to_string(), 291); - map.insert("epoll_create".to_string(), 213); - map.insert("epoll_ctl_old".to_string(), 214); - map.insert("epoll_ctl".to_string(), 233); - map.insert("epoll_pwait".to_string(), 281); - map.insert("epoll_wait_old".to_string(), 215); - map.insert("epoll_wait".to_string(), 232); - map.insert("eventfd2".to_string(), 290); - map.insert("eventfd".to_string(), 284); - map.insert("execveat".to_string(), 322); - map.insert("execve".to_string(), 59); - map.insert("exit_group".to_string(), 231); - map.insert("exit".to_string(), 60); - map.insert("faccessat2".to_string(), 439); - map.insert("faccessat".to_string(), 269); - map.insert("fadvise64".to_string(), 221); - map.insert("fallocate".to_string(), 285); - map.insert("fanotify_init".to_string(), 300); - map.insert("fanotify_mark".to_string(), 301); - map.insert("fchdir".to_string(), 81); - map.insert("fchmodat".to_string(), 268); - map.insert("fchmod".to_string(), 91); - map.insert("fchownat".to_string(), 260); - map.insert("fchown".to_string(), 93); - map.insert("fcntl".to_string(), 72); - map.insert("fdatasync".to_string(), 75); - map.insert("fgetxattr".to_string(), 193); - map.insert("finit_module".to_string(), 313); - map.insert("flistxattr".to_string(), 196); - map.insert("flock".to_string(), 73); - map.insert("fork".to_string(), 57); - map.insert("fremovexattr".to_string(), 199); - map.insert("fsconfig".to_string(), 431); - map.insert("fsetxattr".to_string(), 190); - map.insert("fsmount".to_string(), 432); - map.insert("fsopen".to_string(), 430); - map.insert("fspick".to_string(), 433); - map.insert("fstatfs".to_string(), 138); - map.insert("fstat".to_string(), 5); - map.insert("fsync".to_string(), 74); - map.insert("ftruncate".to_string(), 77); - map.insert("futex".to_string(), 202); - map.insert("futimesat".to_string(), 261); - map.insert("getcpu".to_string(), 309); - map.insert("getcwd".to_string(), 79); - map.insert("getdents64".to_string(), 217); - map.insert("getdents".to_string(), 78); - map.insert("getegid".to_string(), 108); - map.insert("geteuid".to_string(), 107); - map.insert("getgid".to_string(), 104); - map.insert("getgroups".to_string(), 115); - map.insert("getitimer".to_string(), 36); - map.insert("get_kernel_syms".to_string(), 177); - map.insert("get_mempolicy".to_string(), 239); - map.insert("getpeername".to_string(), 52); - map.insert("getpgid".to_string(), 121); - map.insert("getpgrp".to_string(), 111); - map.insert("getpid".to_string(), 39); - map.insert("getpmsg".to_string(), 181); - map.insert("getppid".to_string(), 110); - map.insert("getpriority".to_string(), 140); - map.insert("getrandom".to_string(), 318); - map.insert("getresgid".to_string(), 120); - map.insert("getresuid".to_string(), 118); - map.insert("getrlimit".to_string(), 97); - map.insert("get_robust_list".to_string(), 274); - map.insert("getrusage".to_string(), 98); - map.insert("getsid".to_string(), 124); - map.insert("getsockname".to_string(), 51); - map.insert("getsockopt".to_string(), 55); - map.insert("get_thread_area".to_string(), 211); - map.insert("gettid".to_string(), 186); - map.insert("gettimeofday".to_string(), 96); - map.insert("getuid".to_string(), 102); - map.insert("getxattr".to_string(), 191); - map.insert("init_module".to_string(), 175); - map.insert("inotify_add_watch".to_string(), 254); - map.insert("inotify_init1".to_string(), 294); - map.insert("inotify_init".to_string(), 253); - map.insert("inotify_rm_watch".to_string(), 255); - map.insert("io_cancel".to_string(), 210); - map.insert("ioctl".to_string(), 16); - map.insert("io_destroy".to_string(), 207); - map.insert("io_getevents".to_string(), 208); - map.insert("ioperm".to_string(), 173); - map.insert("io_pgetevents".to_string(), 333); - map.insert("iopl".to_string(), 172); - map.insert("ioprio_get".to_string(), 252); - map.insert("ioprio_set".to_string(), 251); - map.insert("io_setup".to_string(), 206); - map.insert("io_submit".to_string(), 209); - map.insert("io_uring_enter".to_string(), 426); - map.insert("io_uring_register".to_string(), 427); - map.insert("io_uring_setup".to_string(), 425); - map.insert("kcmp".to_string(), 312); - map.insert("kexec_file_load".to_string(), 320); - map.insert("kexec_load".to_string(), 246); - map.insert("keyctl".to_string(), 250); - map.insert("kill".to_string(), 62); - map.insert("lchown".to_string(), 94); - map.insert("lgetxattr".to_string(), 192); - map.insert("linkat".to_string(), 265); - map.insert("link".to_string(), 86); - map.insert("listen".to_string(), 50); - map.insert("listxattr".to_string(), 194); - map.insert("llistxattr".to_string(), 195); - map.insert("lookup_dcookie".to_string(), 212); - map.insert("lremovexattr".to_string(), 198); - map.insert("lseek".to_string(), 8); - map.insert("lsetxattr".to_string(), 189); - map.insert("lstat".to_string(), 6); - map.insert("madvise".to_string(), 28); - map.insert("mbind".to_string(), 237); - map.insert("membarrier".to_string(), 324); - map.insert("memfd_create".to_string(), 319); - map.insert("migrate_pages".to_string(), 256); - map.insert("mincore".to_string(), 27); - map.insert("mkdirat".to_string(), 258); - map.insert("mkdir".to_string(), 83); - map.insert("mknodat".to_string(), 259); - map.insert("mknod".to_string(), 133); - map.insert("mlock2".to_string(), 325); - map.insert("mlockall".to_string(), 151); - map.insert("mlock".to_string(), 149); - map.insert("mmap".to_string(), 9); - map.insert("modify_ldt".to_string(), 154); - map.insert("mount".to_string(), 165); - map.insert("move_mount".to_string(), 429); - map.insert("move_pages".to_string(), 279); - map.insert("mprotect".to_string(), 10); - map.insert("mq_getsetattr".to_string(), 245); - map.insert("mq_notify".to_string(), 244); - map.insert("mq_open".to_string(), 240); - map.insert("mq_timedreceive".to_string(), 243); - map.insert("mq_timedsend".to_string(), 242); - map.insert("mq_unlink".to_string(), 241); - map.insert("mremap".to_string(), 25); - map.insert("msgctl".to_string(), 71); - map.insert("msgget".to_string(), 68); - map.insert("msgrcv".to_string(), 70); - map.insert("msgsnd".to_string(), 69); - map.insert("msync".to_string(), 26); - map.insert("munlockall".to_string(), 152); - map.insert("munlock".to_string(), 150); - map.insert("munmap".to_string(), 11); - map.insert("name_to_handle_at".to_string(), 303); - map.insert("nanosleep".to_string(), 35); - map.insert("newfstatat".to_string(), 262); - map.insert("nfsservctl".to_string(), 180); - map.insert("openat2".to_string(), 437); - map.insert("openat".to_string(), 257); - map.insert("open_by_handle_at".to_string(), 304); - map.insert("open".to_string(), 2); - map.insert("open_tree".to_string(), 428); - map.insert("pause".to_string(), 34); - map.insert("perf_event_open".to_string(), 298); - map.insert("personality".to_string(), 135); - map.insert("pidfd_getfd".to_string(), 438); - map.insert("pidfd_open".to_string(), 434); - map.insert("pidfd_send_signal".to_string(), 424); - map.insert("pipe2".to_string(), 293); - map.insert("pipe".to_string(), 22); - map.insert("pivot_root".to_string(), 155); - map.insert("pkey_alloc".to_string(), 330); - map.insert("pkey_free".to_string(), 331); - map.insert("pkey_mprotect".to_string(), 329); - map.insert("poll".to_string(), 7); - map.insert("ppoll".to_string(), 271); - map.insert("prctl".to_string(), 157); - map.insert("pread64".to_string(), 17); - map.insert("preadv2".to_string(), 327); - map.insert("preadv".to_string(), 295); - map.insert("prlimit64".to_string(), 302); - map.insert("process_madvise".to_string(), 440); - map.insert("process_vm_readv".to_string(), 310); - map.insert("process_vm_writev".to_string(), 311); - map.insert("pselect6".to_string(), 270); - map.insert("ptrace".to_string(), 101); - map.insert("putpmsg".to_string(), 182); - map.insert("pwrite64".to_string(), 18); - map.insert("pwritev2".to_string(), 328); - map.insert("pwritev".to_string(), 296); - map.insert("query_module".to_string(), 178); - map.insert("quotactl".to_string(), 179); - map.insert("readahead".to_string(), 187); - map.insert("readlinkat".to_string(), 267); - map.insert("readlink".to_string(), 89); - map.insert("read".to_string(), 0); - map.insert("readv".to_string(), 19); - map.insert("reboot".to_string(), 169); - map.insert("recvfrom".to_string(), 45); - map.insert("recvmmsg".to_string(), 299); - map.insert("recvmsg".to_string(), 47); - map.insert("remap_file_pages".to_string(), 216); - map.insert("removexattr".to_string(), 197); - map.insert("renameat2".to_string(), 316); - map.insert("renameat".to_string(), 264); - map.insert("rename".to_string(), 82); - map.insert("request_key".to_string(), 249); - map.insert("restart_syscall".to_string(), 219); - map.insert("rmdir".to_string(), 84); - map.insert("rseq".to_string(), 334); - map.insert("rt_sigaction".to_string(), 13); - map.insert("rt_sigpending".to_string(), 127); - map.insert("rt_sigprocmask".to_string(), 14); - map.insert("rt_sigqueueinfo".to_string(), 129); - map.insert("rt_sigreturn".to_string(), 15); - map.insert("rt_sigsuspend".to_string(), 130); - map.insert("rt_sigtimedwait".to_string(), 128); - map.insert("rt_tgsigqueueinfo".to_string(), 297); - map.insert("sched_getaffinity".to_string(), 204); - map.insert("sched_getattr".to_string(), 315); - map.insert("sched_getparam".to_string(), 143); - map.insert("sched_get_priority_max".to_string(), 146); - map.insert("sched_get_priority_min".to_string(), 147); - map.insert("sched_getscheduler".to_string(), 145); - map.insert("sched_rr_get_interval".to_string(), 148); - map.insert("sched_setaffinity".to_string(), 203); - map.insert("sched_setattr".to_string(), 314); - map.insert("sched_setparam".to_string(), 142); - map.insert("sched_setscheduler".to_string(), 144); - map.insert("sched_yield".to_string(), 24); - map.insert("seccomp".to_string(), 317); - map.insert("security".to_string(), 185); - map.insert("select".to_string(), 23); - map.insert("semctl".to_string(), 66); - map.insert("semget".to_string(), 64); - map.insert("semop".to_string(), 65); - map.insert("semtimedop".to_string(), 220); - map.insert("sendfile".to_string(), 40); - map.insert("sendmmsg".to_string(), 307); - map.insert("sendmsg".to_string(), 46); - map.insert("sendto".to_string(), 44); - map.insert("setdomainname".to_string(), 171); - map.insert("setfsgid".to_string(), 123); - map.insert("setfsuid".to_string(), 122); - map.insert("setgid".to_string(), 106); - map.insert("setgroups".to_string(), 116); - map.insert("sethostname".to_string(), 170); - map.insert("setitimer".to_string(), 38); - map.insert("set_mempolicy".to_string(), 238); - map.insert("setns".to_string(), 308); - map.insert("setpgid".to_string(), 109); - map.insert("setpriority".to_string(), 141); - map.insert("setregid".to_string(), 114); - map.insert("setresgid".to_string(), 119); - map.insert("setresuid".to_string(), 117); - map.insert("setreuid".to_string(), 113); - map.insert("setrlimit".to_string(), 160); - map.insert("set_robust_list".to_string(), 273); - map.insert("setsid".to_string(), 112); - map.insert("setsockopt".to_string(), 54); - map.insert("set_thread_area".to_string(), 205); - map.insert("set_tid_address".to_string(), 218); - map.insert("settimeofday".to_string(), 164); - map.insert("setuid".to_string(), 105); - map.insert("setxattr".to_string(), 188); - map.insert("shmat".to_string(), 30); - map.insert("shmctl".to_string(), 31); - map.insert("shmdt".to_string(), 67); - map.insert("shmget".to_string(), 29); - map.insert("shutdown".to_string(), 48); - map.insert("sigaltstack".to_string(), 131); - map.insert("signalfd4".to_string(), 289); - map.insert("signalfd".to_string(), 282); - map.insert("socketpair".to_string(), 53); - map.insert("socket".to_string(), 41); - map.insert("splice".to_string(), 275); - map.insert("statfs".to_string(), 137); - map.insert("stat".to_string(), 4); - map.insert("statx".to_string(), 332); - map.insert("swapoff".to_string(), 168); - map.insert("swapon".to_string(), 167); - map.insert("symlinkat".to_string(), 266); - map.insert("symlink".to_string(), 88); - map.insert("sync_file_range".to_string(), 277); - map.insert("syncfs".to_string(), 306); - map.insert("sync".to_string(), 162); - map.insert("_sysctl".to_string(), 156); - map.insert("sysfs".to_string(), 139); - map.insert("sysinfo".to_string(), 99); - map.insert("syslog".to_string(), 103); - map.insert("tee".to_string(), 276); - map.insert("tgkill".to_string(), 234); - map.insert("timer_create".to_string(), 222); - map.insert("timer_delete".to_string(), 226); - map.insert("timerfd_create".to_string(), 283); - map.insert("timerfd_gettime".to_string(), 287); - map.insert("timerfd_settime".to_string(), 286); - map.insert("timer_getoverrun".to_string(), 225); - map.insert("timer_gettime".to_string(), 224); - map.insert("timer_settime".to_string(), 223); - map.insert("times".to_string(), 100); - map.insert("time".to_string(), 201); - map.insert("tkill".to_string(), 200); - map.insert("truncate".to_string(), 76); - map.insert("tuxcall".to_string(), 184); - map.insert("umask".to_string(), 95); - map.insert("umount2".to_string(), 166); - map.insert("uname".to_string(), 63); - map.insert("unlinkat".to_string(), 263); - map.insert("unlink".to_string(), 87); - map.insert("unshare".to_string(), 272); - map.insert("uselib".to_string(), 134); - map.insert("userfaultfd".to_string(), 323); - map.insert("ustat".to_string(), 136); - map.insert("utimensat".to_string(), 280); - map.insert("utimes".to_string(), 235); - map.insert("utime".to_string(), 132); - map.insert("vfork".to_string(), 58); - map.insert("vhangup".to_string(), 153); - map.insert("vmsplice".to_string(), 278); - map.insert("vserver".to_string(), 236); - map.insert("wait4".to_string(), 61); - map.insert("waitid".to_string(), 247); - map.insert("write".to_string(), 1); - map.insert("writev".to_string(), 20); -} diff --git a/src/seccompiler/src/types.rs b/src/seccompiler/src/types.rs new file mode 100644 index 00000000000..2035f8b8ea4 --- /dev/null +++ b/src/seccompiler/src/types.rs @@ -0,0 +1,192 @@ +// Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::BTreeMap; +use std::ffi::CString; +use std::str::FromStr; + +use serde::*; + +// use libseccomp::{ScmpAction, ScmpArch, ScmpCompareOp}; +use crate::bindings::*; + +/// Comparison to perform when matching a condition. +#[derive(Debug, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum SeccompCmpOp { + Eq, + Ge, + Gt, + Le, + Lt, + MaskedEq(u64), + Ne, +} + +/// Seccomp argument value length. +#[derive(Clone, Debug, Deserialize, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum SeccompCmpArgLen { + /// Argument value length is 4 bytes. + Dword, + /// Argument value length is 8 bytes. + Qword, +} + +/// Condition that syscall must match in order to satisfy a rule. +#[derive(Debug, Deserialize)] +pub struct SeccompCondition { + pub index: u8, + pub op: SeccompCmpOp, + pub val: u64, + #[serde(rename = "type")] + pub val_len: SeccompCmpArgLen, +} + +impl SeccompCondition { + pub fn to_scmp_type(&self) -> scmp_arg_cmp { + match self.op { + SeccompCmpOp::Eq => { + // When using EQ libseccomp compares the whole 64 bits. In + // general this is not a problem, but for example we have + // observed musl `ioctl` to leave garbage in the upper bits of + // the `request` argument. There is a GH issue to allow 32bit + // comparisons (see + // https://github.com/seccomp/libseccomp/issues/383) but is not + // merged yet. Until that is available, do a masked comparison + // with the upper 32bits set to 0, so we will compare that `hi32 + // & 0x0 == 0`, which is always true. This costs one additional + // instruction, but will be likely be optimized away by the BPF + // JIT. + match self.val_len { + SeccompCmpArgLen::Dword => scmp_arg_cmp { + arg: self.index as u32, + op: scmp_compare::SCMP_CMP_MASKED_EQ, + datum_a: 0x00000000FFFFFFFF, + datum_b: self.val, + }, + SeccompCmpArgLen::Qword => scmp_arg_cmp { + arg: self.index as u32, + op: scmp_compare::SCMP_CMP_EQ, + datum_a: self.val, + datum_b: 0, + }, + } + } + SeccompCmpOp::Ge => scmp_arg_cmp { + arg: self.index as u32, + op: scmp_compare::SCMP_CMP_GE, + datum_a: self.val, + datum_b: 0, + }, + SeccompCmpOp::Gt => scmp_arg_cmp { + arg: self.index as u32, + op: scmp_compare::SCMP_CMP_GT, + datum_a: self.val, + datum_b: 0, + }, + SeccompCmpOp::Le => scmp_arg_cmp { + arg: self.index as u32, + op: scmp_compare::SCMP_CMP_LE, + datum_a: self.val, + datum_b: 0, + }, + SeccompCmpOp::Lt => scmp_arg_cmp { + arg: self.index as u32, + op: scmp_compare::SCMP_CMP_LT, + datum_a: self.val, + datum_b: 0, + }, + SeccompCmpOp::Ne => scmp_arg_cmp { + arg: self.index as u32, + op: scmp_compare::SCMP_CMP_NE, + datum_a: self.val, + datum_b: 0, + }, + + SeccompCmpOp::MaskedEq(m) => scmp_arg_cmp { + arg: self.index as u32, + op: scmp_compare::SCMP_CMP_MASKED_EQ, + datum_a: m, + datum_b: self.val, + }, + } + } +} + +/// Actions that `seccomp` can apply to process calling a syscall. +#[derive(Debug, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum SeccompAction { + Allow, + Errno(u16), + KillThread, + KillProcess, + Log, + Trace(u16), + Trap, +} + +impl SeccompAction { + pub fn to_scmp_type(&self) -> u32 { + match self { + SeccompAction::Allow => SCMP_ACT_ALLOW, + SeccompAction::Errno(e) => SCMP_ACT_ERRNO(*e), + SeccompAction::KillThread => SCMP_ACT_KILL_THREAD, + SeccompAction::KillProcess => SCMP_ACT_KILL_PROCESS, + SeccompAction::Log => SCMP_ACT_LOG, + SeccompAction::Trace(t) => SCMP_ACT_TRACE(*t), + SeccompAction::Trap => SCMP_ACT_TRAP, + } + } +} + +/// Rule that `seccomp` attempts to match for a syscall. +/// +/// If all conditions match then rule gets matched. +/// The action of the first rule that matches will be applied to the calling process. +/// If no rule matches the default action is applied. +#[derive(Debug, Deserialize)] +pub struct SyscallRule { + pub syscall: CString, + pub args: Option>, +} + +/// Filter containing rules assigned to syscall numbers. +#[derive(Debug, Deserialize)] +pub struct Filter { + pub default_action: SeccompAction, + pub filter_action: SeccompAction, + pub filter: Vec, +} + +/// Deserializable object that represents the Json filter file. +#[derive(Debug, Deserialize)] +pub struct BpfJson(pub BTreeMap); + +/// Supported target architectures. +#[derive(Debug)] +pub enum TargetArch { + X86_64, + Aarch64, +} + +impl TargetArch { + pub fn to_scmp_type(&self) -> u32 { + match self { + TargetArch::X86_64 => SCMP_ARCH_X86_64, + TargetArch::Aarch64 => SCMP_ARCH_AARCH64, + } + } +} + +impl FromStr for TargetArch { + type Err = String; + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "x86_64" => Ok(TargetArch::X86_64), + "aarch64" => Ok(TargetArch::Aarch64), + _ => Err(s.to_string()), + } + } +} diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 8773198c002..4e1cf8eed73 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -31,7 +31,6 @@ log-instrument = { path = "../log-instrument", optional = true } memfd = "0.6.3" micro_http = { git = "https://github.com/firecracker-microvm/micro-http" } -seccompiler = { path = "../seccompiler" } semver = { version = "1.0.24", features = ["serde"] } serde = { version = "1.0.217", features = ["derive", "rc"] } serde_json = "1.0.135" diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 6131e248d91..82bfa635032 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -19,7 +19,6 @@ use linux_loader::loader::elf::Elf as Loader; #[cfg(target_arch = "aarch64")] use linux_loader::loader::pe::PE as Loader; use linux_loader::loader::KernelLoader; -use seccompiler::BpfThreadMap; use userfaultfd::Uffd; use utils::time::TimestampUs; use vm_memory::ReadVolatile; @@ -63,6 +62,7 @@ use crate::gdb; use crate::logger::{debug, error}; use crate::persist::{MicrovmState, MicrovmStateError}; use crate::resources::VmResources; +use crate::seccomp::BpfThreadMap; use crate::snapshot::Persist; use crate::utils::u64_to_usize; use crate::vmm_config::boot_source::BootConfig; @@ -372,7 +372,7 @@ pub fn build_microvm_for_boot( // Execution panics if filters cannot be loaded, use --no-seccomp if skipping filters // altogether is the desired behaviour. // Keep this as the last step before resuming vcpus. - seccompiler::apply_filter( + crate::seccomp::apply_filter( seccomp_filters .get("vmm") .ok_or_else(|| MissingSeccompFilters("vmm".to_string()))?, @@ -443,7 +443,7 @@ pub enum BuildMicrovmFromSnapshotError { /// Failed to apply VMM secccomp filter as none found. MissingVmmSeccompFilters, /// Failed to apply VMM secccomp filter: {0} - SeccompFiltersInternal(#[from] seccompiler::InstallationError), + SeccompFiltersInternal(#[from] crate::seccomp::InstallationError), /// Failed to restore ACPI device manager: {0} ACPIDeviManager(#[from] ACPIDeviceManagerRestoreError), /// VMGenID update failed: {0} @@ -559,7 +559,7 @@ pub fn build_microvm_from_snapshot( // Load seccomp filters for the VMM thread. // Keep this as the last step of the building process. - seccompiler::apply_filter( + crate::seccomp::apply_filter( seccomp_filters .get("vmm") .ok_or(BuildMicrovmFromSnapshotError::MissingVmmSeccompFilters)?, diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 6a28e14f26b..77c0018c55a 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -97,7 +97,7 @@ pub mod resources; /// microVM RPC API adapters. pub mod rpc_interface; /// Seccomp filter utilities. -pub mod seccomp_filters; +pub mod seccomp; /// Signal handling utilities. pub mod signal_handler; /// Serialization and deserialization facilities @@ -122,7 +122,7 @@ use device_manager::acpi::ACPIDeviceManager; use device_manager::resources::ResourceAllocator; use devices::acpi::vmgenid::VmGenIdError; use event_manager::{EventManager as BaseEventManager, EventOps, Events, MutEventSubscriber}; -use seccompiler::BpfProgram; +use seccomp::BpfProgram; use userfaultfd::Uffd; use vmm_sys_util::epoll::EventSet; use vmm_sys_util::eventfd::EventFd; @@ -225,7 +225,7 @@ pub enum VmmError { /// Cannot add a device to the MMIO Bus. {0} RegisterMMIODevice(device_manager::mmio::MmioError), /// Cannot install seccomp filters: {0} - SeccompFilters(seccompiler::InstallationError), + SeccompFilters(seccomp::InstallationError), /// Error writing to the serial console: {0} Serial(io::Error), /// Error creating timer fd: {0} diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 3479e0b6309..1feef41ec30 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -11,7 +11,6 @@ use std::os::unix::net::UnixStream; use std::path::Path; use std::sync::{Arc, Mutex}; -use seccompiler::BpfThreadMap; use semver::Version; use serde::{Deserialize, Serialize}; use userfaultfd::{FeatureFlags, Uffd, UffdBuilder}; @@ -28,6 +27,7 @@ use crate::cpu_config::x86_64::cpuid::CpuidTrait; use crate::device_manager::persist::{ACPIDeviceManagerState, DevicePersistError, DeviceStates}; use crate::logger::{info, warn}; use crate::resources::VmResources; +use crate::seccomp::BpfThreadMap; use crate::snapshot::Snapshot; use crate::utils::u64_to_usize; use crate::vmm_config::boot_source::BootSourceConfig; diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index b0c95529f46..60a046f7e89 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -4,7 +4,6 @@ use std::fmt::{self, Debug}; use std::sync::{Arc, Mutex, MutexGuard}; -use seccompiler::BpfThreadMap; use serde_json::Value; use utils::time::{get_time_us, ClockType}; @@ -18,6 +17,7 @@ use crate::logger::{info, warn, LoggerConfig, *}; use crate::mmds::data_store::{self, Mmds}; use crate::persist::{CreateSnapshotError, RestoreFromSnapshotError, VmInfo}; use crate::resources::VmmConfig; +use crate::seccomp::BpfThreadMap; use crate::vmm_config::balloon::{ BalloonConfigError, BalloonDeviceConfig, BalloonStats, BalloonUpdateConfig, BalloonUpdateStatsConfig, @@ -852,12 +852,11 @@ impl RuntimeApiController { mod tests { use std::path::PathBuf; - use seccompiler::BpfThreadMap; - use super::*; use crate::builder::tests::default_vmm; use crate::devices::virtio::block::CacheType; use crate::mmds::data_store::MmdsVersion; + use crate::seccomp::BpfThreadMap; use crate::vmm_config::snapshot::{MemBackendConfig, MemBackendType}; use crate::HTTP_MAX_PAYLOAD_SIZE; diff --git a/src/vmm/src/seccomp.rs b/src/vmm/src/seccomp.rs new file mode 100644 index 00000000000..6948754f51a --- /dev/null +++ b/src/vmm/src/seccomp.rs @@ -0,0 +1,238 @@ +// Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::collections::HashMap; +use std::io::Read; +use std::sync::Arc; + +use bincode::{DefaultOptions, Error as BincodeError, Options}; + +/// Each BPF instruction is 8 bytes long and 4 byte aligned. +/// This alignment needs to be satisfied in order for a BPF code to be accepted +/// by the syscalls. Using u64 here is is safe as it has same size and even bigger alignment. +pub type BpfInstruction = u64; + +/// Program made up of a sequence of BPF instructions. +pub type BpfProgram = Vec; + +/// Reference to program made up of a sequence of BPF instructions. +pub type BpfProgramRef<'a> = &'a [BpfInstruction]; + +/// Type that associates a thread category to a BPF program. +pub type BpfThreadMap = HashMap>; + +/// Retrieve empty seccomp filters. +pub fn get_empty_filters() -> BpfThreadMap { + let mut map = BpfThreadMap::new(); + map.insert("vmm".to_string(), Arc::new(vec![])); + map.insert("api".to_string(), Arc::new(vec![])); + map.insert("vcpu".to_string(), Arc::new(vec![])); + map +} + +/// Binary filter deserialization errors. +#[derive(Debug, thiserror::Error, displaydoc::Display)] +pub enum DeserializationError { + /// Bincode deserialization failed: {0} + Bincode(BincodeError), +} + +/// Deserialize binary with bpf filters +pub fn deserialize_binary( + reader: R, + bytes_limit: Option, +) -> Result { + let result = match bytes_limit { + Some(limit) => DefaultOptions::new() + .with_fixint_encoding() + .allow_trailing_bytes() + .with_limit(limit) + .deserialize_from::>(reader), + // No limit is the default. + None => bincode::deserialize_from::>(reader), + } + .map_err(DeserializationError::Bincode)?; + + Ok(result + .into_iter() + .map(|(k, v)| (k.to_lowercase(), Arc::new(v))) + .collect()) +} + +/// Filter installation errors. +#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] +pub enum InstallationError { + /// Filter length exceeds the maximum size of {BPF_MAX_LEN:} instructions + FilterTooLarge, + /// prctl` syscall failed with error code: {0} + Prctl(i32), +} + +/// The maximum seccomp-BPF program length allowed by the linux kernel. +pub const BPF_MAX_LEN: usize = 4096; + +/// BPF structure definition for filter array. +/// See /usr/include/linux/filter.h . +#[repr(C)] +#[derive(Debug)] +struct SockFprog { + len: u16, + filter: *const BpfInstruction, +} + +/// Apply bpf filter. +pub fn apply_filter(bpf_filter: BpfProgramRef) -> Result<(), InstallationError> { + // If the program is empty, don't install the filter. + if bpf_filter.is_empty() { + return Ok(()); + } + + // If the program length is greater than the limit allowed by the kernel, + // fail quickly. Otherwise, `prctl` will give a more cryptic error code. + if BPF_MAX_LEN < bpf_filter.len() { + return Err(InstallationError::FilterTooLarge); + } + + let bpf_filter_len = + u16::try_from(bpf_filter.len()).map_err(|_| InstallationError::FilterTooLarge)?; + + // SAFETY: Safe because the parameters are valid. + unsafe { + { + let rc = libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + if rc != 0 { + return Err(InstallationError::Prctl(*libc::__errno_location())); + } + } + + let bpf_prog = SockFprog { + len: bpf_filter_len, + filter: bpf_filter.as_ptr(), + }; + let bpf_prog_ptr = &bpf_prog as *const SockFprog; + { + let rc = libc::prctl( + libc::PR_SET_SECCOMP, + libc::SECCOMP_MODE_FILTER, + bpf_prog_ptr, + ); + if rc != 0 { + return Err(InstallationError::Prctl(*libc::__errno_location())); + } + } + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + #![allow(clippy::undocumented_unsafe_blocks)] + + use std::collections::HashMap; + use std::sync::Arc; + use std::thread; + + use super::*; + + #[test] + fn test_deserialize_binary() { + // Malformed bincode binary. + { + let data = "adassafvc".to_string(); + deserialize_binary(data.as_bytes(), None).unwrap_err(); + } + + // Test that the binary deserialization is correct, and that the thread keys + // have been lowercased. + { + let bpf_prog = vec![0; 2]; + let mut filter_map: HashMap = HashMap::new(); + filter_map.insert("VcpU".to_string(), bpf_prog.clone()); + let bytes = bincode::serialize(&filter_map).unwrap(); + + let mut expected_res = BpfThreadMap::new(); + expected_res.insert("vcpu".to_string(), Arc::new(bpf_prog)); + assert_eq!(deserialize_binary(&bytes[..], None).unwrap(), expected_res); + } + + // Test deserialization with binary_limit. + { + let bpf_prog = vec![0; 2]; + + let mut filter_map: HashMap = HashMap::new(); + filter_map.insert("t1".to_string(), bpf_prog.clone()); + + let bytes = bincode::serialize(&filter_map).unwrap(); + + // Binary limit too low. + assert!(matches!( + deserialize_binary(&bytes[..], Some(20)).unwrap_err(), + DeserializationError::Bincode(error) + if error.to_string() == "the size limit has been reached" + )); + + let mut expected_res = BpfThreadMap::new(); + expected_res.insert("t1".to_string(), Arc::new(bpf_prog)); + + // Correct binary limit. + assert_eq!( + deserialize_binary(&bytes[..], Some(50)).unwrap(), + expected_res + ); + } + } + + #[test] + fn test_filter_apply() { + // Test filter too large. + thread::spawn(|| { + let filter: BpfProgram = vec![0; 5000]; + + // Apply seccomp filter. + assert_eq!( + apply_filter(&filter).unwrap_err(), + InstallationError::FilterTooLarge + ); + }) + .join() + .unwrap(); + + // Test empty filter. + thread::spawn(|| { + let filter: BpfProgram = vec![]; + + assert_eq!(filter.len(), 0); + + let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; + assert_eq!(seccomp_level, 0); + + apply_filter(&filter).unwrap(); + + // test that seccomp level remains 0 on failure. + let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; + assert_eq!(seccomp_level, 0); + }) + .join() + .unwrap(); + + // Test invalid BPF code. + thread::spawn(|| { + let filter = vec![0xFF; 1]; + + let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; + assert_eq!(seccomp_level, 0); + + assert_eq!( + apply_filter(&filter).unwrap_err(), + InstallationError::Prctl(22) + ); + + // test that seccomp level remains 0 on failure. + let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; + assert_eq!(seccomp_level, 0); + }) + .join() + .unwrap(); + } +} diff --git a/src/vmm/src/seccomp_filters.rs b/src/vmm/src/seccomp_filters.rs deleted file mode 100644 index aabdc1ef2c1..00000000000 --- a/src/vmm/src/seccomp_filters.rs +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 -use std::sync::Arc; - -use seccompiler::BpfThreadMap; - -/// Retrieve empty seccomp filters. -pub fn get_empty_filters() -> BpfThreadMap { - let mut map = BpfThreadMap::new(); - map.insert("vmm".to_string(), Arc::new(vec![])); - map.insert("api".to_string(), Arc::new(vec![])); - map.insert("vcpu".to_string(), Arc::new(vec![])); - map -} diff --git a/src/vmm/src/signal_handler.rs b/src/vmm/src/signal_handler.rs index 5bcfd41fd06..ac4befcb3d1 100644 --- a/src/vmm/src/signal_handler.rs +++ b/src/vmm/src/signal_handler.rs @@ -177,7 +177,6 @@ mod tests { use std::{process, thread}; use libc::syscall; - use seccompiler::sock_filter; use super::*; @@ -186,11 +185,6 @@ mod tests { let child = thread::spawn(move || { register_signal_handlers().unwrap(); - let filter = make_test_seccomp_bpf_filter(); - - seccompiler::apply_filter(&filter).unwrap(); - assert_eq!(METRICS.seccomp.num_faults.fetch(), 0); - // Call the forbidden `SYS_mkdirat`. unsafe { libc::syscall(libc::SYS_mkdirat, "/foo/bar\0") }; @@ -238,7 +232,6 @@ mod tests { }); child.join().unwrap(); - assert!(METRICS.seccomp.num_faults.fetch() >= 1); assert!(METRICS.signals.sigbus.fetch() >= 1); assert!(METRICS.signals.sigsegv.fetch() >= 1); assert!(METRICS.signals.sigxfsz.fetch() >= 1); @@ -247,141 +240,4 @@ mod tests { assert!(METRICS.signals.sighup.fetch() >= 1); assert!(METRICS.signals.sigill.fetch() >= 1); } - - fn make_test_seccomp_bpf_filter() -> Vec { - // Create seccomp filter that allows all syscalls, except for `SYS_mkdirat`. - // For some reason, directly calling `SYS_kill` with SIGSYS, like we do with the - // other signals, results in an error. Probably because of the way `cargo test` is - // handling signals. - #[cfg(target_arch = "aarch64")] - #[allow(clippy::unreadable_literal)] - let bpf_filter = vec![ - sock_filter { - code: 32, - jt: 0, - jf: 0, - k: 4, - }, - sock_filter { - code: 21, - jt: 1, - jf: 0, - k: 3221225655, - }, - sock_filter { - code: 6, - jt: 0, - jf: 0, - k: 0, - }, - sock_filter { - code: 32, - jt: 0, - jf: 0, - k: 0, - }, - sock_filter { - code: 21, - jt: 0, - jf: 1, - k: 34, - }, - sock_filter { - code: 5, - jt: 0, - jf: 0, - k: 1, - }, - sock_filter { - code: 5, - jt: 0, - jf: 0, - k: 2, - }, - sock_filter { - code: 6, - jt: 0, - jf: 0, - k: 196608, - }, - sock_filter { - code: 6, - jt: 0, - jf: 0, - k: 2147418112, - }, - sock_filter { - code: 6, - jt: 0, - jf: 0, - k: 2147418112, - }, - ]; - #[cfg(target_arch = "x86_64")] - #[allow(clippy::unreadable_literal)] - let bpf_filter = vec![ - sock_filter { - code: 32, - jt: 0, - jf: 0, - k: 4, - }, - sock_filter { - code: 21, - jt: 1, - jf: 0, - k: 3221225534, - }, - sock_filter { - code: 6, - jt: 0, - jf: 0, - k: 0, - }, - sock_filter { - code: 32, - jt: 0, - jf: 0, - k: 0, - }, - sock_filter { - code: 21, - jt: 0, - jf: 1, - k: 258, - }, - sock_filter { - code: 5, - jt: 0, - jf: 0, - k: 1, - }, - sock_filter { - code: 5, - jt: 0, - jf: 0, - k: 2, - }, - sock_filter { - code: 6, - jt: 0, - jf: 0, - k: 196608, - }, - sock_filter { - code: 6, - jt: 0, - jf: 0, - k: 2147418112, - }, - sock_filter { - code: 6, - jt: 0, - jf: 0, - k: 2147418112, - }, - ]; - - bpf_filter - } } diff --git a/src/vmm/src/test_utils/mod.rs b/src/vmm/src/test_utils/mod.rs index f90ba7fbd0c..1ba79a55231 100644 --- a/src/vmm/src/test_utils/mod.rs +++ b/src/vmm/src/test_utils/mod.rs @@ -10,7 +10,7 @@ use vmm_sys_util::tempdir::TempDir; use crate::builder::build_microvm_for_boot; use crate::resources::VmResources; -use crate::seccomp_filters::get_empty_filters; +use crate::seccomp::get_empty_filters; use crate::test_utils::mock_resources::{MockBootSourceConfig, MockVmConfig, MockVmResources}; use crate::vmm_config::boot_source::BootSourceConfig; use crate::vmm_config::instance_info::InstanceInfo; diff --git a/src/vmm/src/vstate/vcpu/mod.rs b/src/vmm/src/vstate/vcpu/mod.rs index 73779fbd928..ddfeda21b4c 100644 --- a/src/vmm/src/vstate/vcpu/mod.rs +++ b/src/vmm/src/vstate/vcpu/mod.rs @@ -19,7 +19,6 @@ use kvm_ioctls::VcpuExit; use kvm_ioctls::VcpuFd; use libc::{c_int, c_void, siginfo_t}; use log::{error, info, warn}; -use seccompiler::{BpfProgram, BpfProgramRef}; use vmm_sys_util::errno; use vmm_sys_util::eventfd::EventFd; @@ -27,6 +26,7 @@ use crate::cpu_config::templates::{CpuConfiguration, GuestConfigError}; #[cfg(feature = "gdb")] use crate::gdb::target::{get_raw_tid, GdbTargetError}; use crate::logger::{IncMetric, METRICS}; +use crate::seccomp::{BpfProgram, BpfProgramRef}; use crate::utils::signal::{register_signal_handler, sigrtmin, Killable}; use crate::utils::sm::StateMachine; use crate::vstate::vm::Vm; @@ -288,7 +288,7 @@ impl Vcpu { // Load seccomp filters for this vCPU thread. // Execution panics if filters cannot be loaded, use --no-seccomp if skipping filters // altogether is the desired behaviour. - if let Err(err) = seccompiler::apply_filter(seccomp_filter) { + if let Err(err) = crate::seccomp::apply_filter(seccomp_filter) { panic!( "Failed to set the requested seccomp filters on vCPU {}: Error: {}", self.kvm_vcpu.index, err @@ -773,7 +773,7 @@ pub(crate) mod tests { use crate::builder::StartMicrovmError; use crate::devices::bus::DummyDevice; use crate::devices::BusDevice; - use crate::seccomp_filters::get_empty_filters; + use crate::seccomp::get_empty_filters; use crate::utils::signal::validate_signal_num; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; use crate::vstate::vcpu::VcpuError as EmulationError; diff --git a/src/vmm/tests/integration_tests.rs b/src/vmm/tests/integration_tests.rs index 4312c6345db..40eab05c4a4 100644 --- a/src/vmm/tests/integration_tests.rs +++ b/src/vmm/tests/integration_tests.rs @@ -12,7 +12,7 @@ use vmm::resources::VmResources; use vmm::rpc_interface::{ LoadSnapshotError, PrebootApiController, RuntimeApiController, VmmAction, VmmActionError, }; -use vmm::seccomp_filters::get_empty_filters; +use vmm::seccomp::get_empty_filters; use vmm::snapshot::Snapshot; #[cfg(target_arch = "x86_64")] use vmm::test_utils::dirty_tracking_vmm; From acee69f3d1d130bdd6531ed9782c15e7a86e919a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Barb=C3=A1chano?= Date: Mon, 16 Dec 2024 17:21:23 +0100 Subject: [PATCH 31/78] devtool: remove obsolete commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we depend on libseccomp in the previous commit, these commands to update the syscall table are no longer needed. Signed-off-by: Pablo Barbáchano --- tools/devtool | 137 -------------------------------------------------- 1 file changed, 137 deletions(-) diff --git a/tools/devtool b/tools/devtool index 2ab7969484b..29434bf8100 100755 --- a/tools/devtool +++ b/tools/devtool @@ -375,12 +375,6 @@ cmd_help() { echo " This should be used as the last step in every commit, to ensure that the" echo " Rust style tests pass." echo "" - echo " generate_syscall_tables " - echo " Generates the syscall tables for seccompiler, according to a given kernel version." - echo " Release candidate (rc) linux versions are not allowed." - echo " Outputs a rust file for each supported arch: src/seccompiler/src/syscall_table/{arch}.rs" - echo " Supported architectures: x86_64 and aarch64." - echo "" echo " install [-p|--path] [--debug|--release]" echo " Install firecracker, jailer and seccomp binaries to /usr/local/bin or a given path." echo " Only the musl linked binaries are supported." @@ -1029,137 +1023,6 @@ cmd_checkenv() { check_vulns } -generate_syscall_table_x86_64() { - path_to_rust_file="$FC_ROOT_DIR/src/seccompiler/src/syscall_table/x86_64.rs" - - echo "$header" > $path_to_rust_file - - # the table for x86_64 is nicely formatted here: linux/arch/x86/entry/syscalls/syscall_64.tbl - cat linux/arch/x86/entry/syscalls/syscall_64.tbl | grep -v "^#" | grep -v -e '^$' |\ - awk '{print $2,$3,$1}' | grep -v "^x32" |\ - awk '{print " map.insert(\""$2"\".to_string(), "$3");"}' | sort >> $path_to_rust_file - - echo "$footer" >> $path_to_rust_file - - say "Generated at: $path_to_rust_file" -} - -generate_syscall_table_aarch64() { - path_to_rust_file="$FC_ROOT_DIR/src/seccompiler/src/syscall_table/aarch64.rs" - - # filter for substituting `#define`s that point to other macros; - # values taken from linux/include/uapi/asm-generic/unistd.h - replace+='s/__NR3264_fadvise64/223/;' - replace+='s/__NR3264_fcntl/25/;' - replace+='s/__NR3264_fstatat/79/;' - replace+='s/__NR3264_fstatfs/44/;' - replace+='s/__NR3264_fstat/80/;' - replace+='s/__NR3264_ftruncate/46/;' - replace+='s/__NR3264_lseek/62/;' - replace+='s/__NR3264_sendfile/71/;' - replace+='s/__NR3264_statfs/43/;' - replace+='s/__NR3264_truncate/45/;' - replace+='s/__NR3264_mmap/222/;' - - echo "$header" > $path_to_rust_file - - # run the gcc command in the Docker container (to make sure that we have gcc installed) - # the aarch64 syscall table is not located in a .tbl file, like x86; we run gcc's - # pre-processor to extract the numeric constants from header files. - run_devctr \ - --user "$(id -u):$(id -g)" \ - --workdir "$CTR_KERNEL_DIR" \ - -- \ - gcc -Ilinux/include/uapi -E -dM -D__ARCH_WANT_RENAMEAT\ - -D__BITS_PER_LONG=64\ - linux/arch/arm64/include/uapi/asm/unistd.h |\ - grep "#define __NR_" | grep -v "__NR_syscalls" |\ - grep -v "__NR_arch_specific_syscall" |\ - awk -F '__NR_' '{print $2}' |\ - sed $replace |\ - awk '{ print " map.insert(\""$1"\".to_string(), "$2");" }' |\ - sort -d >> $path_to_rust_file - ret=$? - - [ $ret -ne 0 ] && return $ret - - echo "$footer" >> $path_to_rust_file - - say "Generated at: $path_to_rust_file" -} - -cmd_generate_syscall_tables() { - # Parse any command line args. - while [ $# -gt 0 ]; do - case "$1" in - "-h"|"--help") { cmd_help; exit 1; } ;; - *) { kernel_version="$1"; break; } ;; - esac - shift - done - - validate_kernel_version "$kernel_version" - - kernel_major=v$(echo ${kernel_version} | cut -d . -f 1).x - kernel_baseurl=https://www.kernel.org/pub/linux/kernel/${kernel_major} - kernel_archive=linux-${kernel_version}.tar.xz - - ensure_devctr - - # Create the kernel clone directory - rm -rf "$KERNEL_DIR" - create_dir "$KERNEL_DIR" - cd "$KERNEL_DIR" - - say "Fetching linux kernel..." - - # Get sha256 checksum. - curl -fsSLO ${kernel_baseurl}/sha256sums.asc && \ - kernel_sha256=$(grep ${kernel_archive} sha256sums.asc | cut -d ' ' -f 1) - # Get kernel archive. - curl -fsSLO "$kernel_baseurl/$kernel_archive" && \ - # Verify checksum. - echo "${kernel_sha256} ${kernel_archive}" | sha256sum -c - && \ - # Decompress the kernel source. - xz -d "${kernel_archive}" && \ - cat linux-${kernel_version}.tar | tar -x && mv linux-${kernel_version} linux - - ret=$? - [ $ret -ne 0 ] && return $ret - - # rust file header - read -r -d '' header << EOM -// Copyright $(date +"%Y") Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -// This file is auto-generated by \`tools/devtool generate_syscall_tables\`. -// Do NOT manually edit! -// Generated at: $(date) -// Kernel version: $kernel_version - -use std::collections::HashMap; - -pub(crate) fn make_syscall_table(map: &mut HashMap) { -EOM - - # rust file footer - read -r -d '' footer << EOM -} - -EOM - - # generate syscall table for x86_64 - say "Generating table for x86_64..." - generate_syscall_table_x86_64 $header $footer - - # generate syscall table for aarch64 - say "Generating table for aarch64..." - generate_syscall_table_aarch64 $header $footer - - ret=$? - [ $ret -ne 0 ] && return $ret -} - cmd_install() { # By default we install release/musl binaries. profile="release" From 66263bde82380bdd57166febdf324639b1b28576 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Fri, 29 Nov 2024 16:17:19 +0000 Subject: [PATCH 32/78] refactor(seccomp): replace deprecated `prctl` with `syscall` According to https://www.man7.org/linux/man-pages/man2/PR_SET_SECCOMP.2const.html using `prctl` for setting seccomp filer is deprecated, so switch to using `syscall` instead. Signed-off-by: Egor Lazarchuk --- src/vmm/src/seccomp.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/vmm/src/seccomp.rs b/src/vmm/src/seccomp.rs index 6948754f51a..f62845bc800 100644 --- a/src/vmm/src/seccomp.rs +++ b/src/vmm/src/seccomp.rs @@ -111,9 +111,10 @@ pub fn apply_filter(bpf_filter: BpfProgramRef) -> Result<(), InstallationError> }; let bpf_prog_ptr = &bpf_prog as *const SockFprog; { - let rc = libc::prctl( - libc::PR_SET_SECCOMP, - libc::SECCOMP_MODE_FILTER, + let rc = libc::syscall( + libc::SYS_seccomp, + libc::SECCOMP_SET_MODE_FILTER, + 0, bpf_prog_ptr, ); if rc != 0 { From a00964eef5a8d1e1c169137a4cb31ee87ca6d862 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Wed, 11 Dec 2024 12:41:15 +0000 Subject: [PATCH 33/78] refactor: use std::io::Error when setting seccomp Replace __errno_location() with std::io::Error::last_os_error() as a more standard of getting errno value. Signed-off-by: Egor Lazarchuk --- src/vmm/src/seccomp.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/vmm/src/seccomp.rs b/src/vmm/src/seccomp.rs index f62845bc800..02fbf01b2dc 100644 --- a/src/vmm/src/seccomp.rs +++ b/src/vmm/src/seccomp.rs @@ -60,12 +60,12 @@ pub fn deserialize_binary( } /// Filter installation errors. -#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] +#[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum InstallationError { /// Filter length exceeds the maximum size of {BPF_MAX_LEN:} instructions FilterTooLarge, /// prctl` syscall failed with error code: {0} - Prctl(i32), + Prctl(std::io::Error), } /// The maximum seccomp-BPF program length allowed by the linux kernel. @@ -101,7 +101,7 @@ pub fn apply_filter(bpf_filter: BpfProgramRef) -> Result<(), InstallationError> { let rc = libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); if rc != 0 { - return Err(InstallationError::Prctl(*libc::__errno_location())); + return Err(InstallationError::Prctl(std::io::Error::last_os_error())); } } @@ -118,7 +118,7 @@ pub fn apply_filter(bpf_filter: BpfProgramRef) -> Result<(), InstallationError> bpf_prog_ptr, ); if rc != 0 { - return Err(InstallationError::Prctl(*libc::__errno_location())); + return Err(InstallationError::Prctl(std::io::Error::last_os_error())); } } } @@ -191,10 +191,10 @@ mod tests { let filter: BpfProgram = vec![0; 5000]; // Apply seccomp filter. - assert_eq!( + assert!(matches!( apply_filter(&filter).unwrap_err(), InstallationError::FilterTooLarge - ); + )); }) .join() .unwrap(); @@ -224,10 +224,10 @@ mod tests { let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; assert_eq!(seccomp_level, 0); - assert_eq!( + assert!(matches!( apply_filter(&filter).unwrap_err(), - InstallationError::Prctl(22) - ); + InstallationError::Prctl(_) + )); // test that seccomp level remains 0 on failure. let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; From f680208cd5ae74aa07952fc18638879eac84313f Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Wed, 11 Dec 2024 13:54:46 +0000 Subject: [PATCH 34/78] refactor: replace DeserializationError with type alias The error enum had only 1 element and we can replace it with alias for simplicity. Signed-off-by: Egor Lazarchuk --- src/vmm/src/seccomp.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/vmm/src/seccomp.rs b/src/vmm/src/seccomp.rs index 02fbf01b2dc..3da974e6027 100644 --- a/src/vmm/src/seccomp.rs +++ b/src/vmm/src/seccomp.rs @@ -5,7 +5,7 @@ use std::collections::HashMap; use std::io::Read; use std::sync::Arc; -use bincode::{DefaultOptions, Error as BincodeError, Options}; +use bincode::{DefaultOptions, Options}; /// Each BPF instruction is 8 bytes long and 4 byte aligned. /// This alignment needs to be satisfied in order for a BPF code to be accepted @@ -21,6 +21,9 @@ pub type BpfProgramRef<'a> = &'a [BpfInstruction]; /// Type that associates a thread category to a BPF program. pub type BpfThreadMap = HashMap>; +/// Binary filter deserialization errors. +pub type DeserializationError = bincode::Error; + /// Retrieve empty seccomp filters. pub fn get_empty_filters() -> BpfThreadMap { let mut map = BpfThreadMap::new(); @@ -30,13 +33,6 @@ pub fn get_empty_filters() -> BpfThreadMap { map } -/// Binary filter deserialization errors. -#[derive(Debug, thiserror::Error, displaydoc::Display)] -pub enum DeserializationError { - /// Bincode deserialization failed: {0} - Bincode(BincodeError), -} - /// Deserialize binary with bpf filters pub fn deserialize_binary( reader: R, @@ -50,8 +46,7 @@ pub fn deserialize_binary( .deserialize_from::>(reader), // No limit is the default. None => bincode::deserialize_from::>(reader), - } - .map_err(DeserializationError::Bincode)?; + }?; Ok(result .into_iter() @@ -169,7 +164,7 @@ mod tests { // Binary limit too low. assert!(matches!( deserialize_binary(&bytes[..], Some(20)).unwrap_err(), - DeserializationError::Bincode(error) + error if error.to_string() == "the size limit has been reached" )); From bd42beb28c41d5a0b0d9a88963fdada4c1f1ccce Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Wed, 11 Dec 2024 14:57:58 +0000 Subject: [PATCH 35/78] chore: add libseccomp update to CHANGELOG Add a note about updating backend for seccompiler to libseccomp. Signed-off-by: Egor Lazarchuk --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e402b85ad43..c993dfe8bb2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,9 @@ and this project adheres to unnecessary fields (`max_connections` and `max_pending_resets`) from the snapshot format, bumping the snapshot version to 5.0.0. Users need to regenerate snapshots. +- [#4926](https://github.com/firecracker-microvm/firecracker/pull/4926): Replace + underlying implementation for seccompiler from in house one in favor of + `libseccomp` which produces smaller and more optimized BPF code. ### Deprecated From 104d5d0b1b6ccb07b4d6f41ac09e2bcaaa61e769 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Wed, 11 Dec 2024 16:04:26 +0000 Subject: [PATCH 36/78] chore: add additional path for libseccomp lib Kani on x86 for some reason cannot find libseccomp by default, so we add additional path to the build.rs Signed-off-by: Egor Lazarchuk --- src/seccompiler/build.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/seccompiler/build.rs b/src/seccompiler/build.rs index 69878f1f31b..d0d2a30e39e 100644 --- a/src/seccompiler/build.rs +++ b/src/seccompiler/build.rs @@ -2,5 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 fn main() { + println!("cargo::rustc-link-search=/usr/local/lib"); println!("cargo::rustc-link-lib=seccomp"); } From c18254467b695a71631d9896df13dcf3a9fd4db8 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Thu, 9 Jan 2025 17:04:25 +0000 Subject: [PATCH 37/78] chore: update NOTICE with info about libseccomp usage Add a note about libseccomp usage in Firecracker build process and in the seccomp-bin. Signed-off-by: Egor Lazarchuk --- NOTICE | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/NOTICE b/NOTICE index ff113a0f1a4..f2daaa7be8d 100644 --- a/NOTICE +++ b/NOTICE @@ -5,3 +5,8 @@ SPDX-License-Identifier: Apache-2.0 Portions Copyright 2017 The Chromium OS Authors. All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the THIRD-PARTY file. + +The Firecracker release bundle includes libseccomp which is available +under the LGPLv2.1 license. This is used in the Firecracker build process +to produce cBPF bytecode that is shipped alongside Firecracker for use by +the Linux kernel. From 9e011ba19d0f1c4e0e646888f0b1ce3a41c689fe Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Fri, 10 Jan 2025 14:22:37 +0000 Subject: [PATCH 38/78] examples: Have UFFD handler kill Firecracker should it die If the UFFD handler exits abnormaly for some reason, have it take down Firecracker as well by SIGKILL-ing it from a panic hook. For this, reintroduce the "get peer creds" logic. We have to use SIGKILL because Firecracker could be inside the handler for a KVM-originated page fault that is not marked as interruptible, in which case all signals but SIGKILL are ignored (happens for example during KVM_SET_MSRS when it triggers the initialization of a gfn_to_pfn_cache for the kvm-clock page, which uses GUP without FOLL_INTERRUPTIBLE). While we're at it, add a hint to the generic "process not found" error message to indicate that potentially Firecracker died, and that the cause of this could be the UFFD handler crashing (for example, in #4601 the cause of the mystery hang is the UFFD handler crashing, but we were stumped by what's going on for over half a year. Let's avoid that going forward). We can't enable this by default because it interferes with unittests, and also the "malicious_handler", so expose a function on `Runtime` to enable it only in valid_handler and fault_all_handler. Signed-off-by: Patrick Roy --- .../examples/uffd/fault_all_handler.rs | 1 + src/firecracker/examples/uffd/uffd_utils.rs | 37 +++++++++++++++++++ .../examples/uffd/valid_handler.rs | 1 + tests/framework/microvm.py | 3 ++ 4 files changed, 42 insertions(+) diff --git a/src/firecracker/examples/uffd/fault_all_handler.rs b/src/firecracker/examples/uffd/fault_all_handler.rs index 31ce68a97bc..5e9f49a3207 100644 --- a/src/firecracker/examples/uffd/fault_all_handler.rs +++ b/src/firecracker/examples/uffd/fault_all_handler.rs @@ -24,6 +24,7 @@ fn main() { let (stream, _) = listener.accept().expect("Cannot listen on UDS socket"); let mut runtime = Runtime::new(stream, file); + runtime.install_panic_hook(); runtime.run(|uffd_handler: &mut UffdHandler| { // Read an event from the userfaultfd. let event = uffd_handler diff --git a/src/firecracker/examples/uffd/uffd_utils.rs b/src/firecracker/examples/uffd/uffd_utils.rs index 52d33765bd8..37aa63c62a3 100644 --- a/src/firecracker/examples/uffd/uffd_utils.rs +++ b/src/firecracker/examples/uffd/uffd_utils.rs @@ -208,6 +208,43 @@ impl Runtime { } } + fn peer_process_credentials(&self) -> libc::ucred { + let mut creds: libc::ucred = libc::ucred { + pid: 0, + gid: 0, + uid: 0, + }; + let mut creds_size = size_of::() as u32; + let ret = unsafe { + libc::getsockopt( + self.stream.as_raw_fd(), + libc::SOL_SOCKET, + libc::SO_PEERCRED, + &mut creds as *mut _ as *mut _, + &mut creds_size as *mut libc::socklen_t, + ) + }; + if ret != 0 { + panic!("Failed to get peer process credentials"); + } + creds + } + + pub fn install_panic_hook(&self) { + let peer_creds = self.peer_process_credentials(); + + let default_panic_hook = std::panic::take_hook(); + std::panic::set_hook(Box::new(move |panic_info| { + let r = unsafe { libc::kill(peer_creds.pid, libc::SIGKILL) }; + + if r != 0 { + eprintln!("Failed to kill Firecracker process from panic hook"); + } + + default_panic_hook(panic_info); + })); + } + /// Polls the `UnixStream` and UFFD fds in a loop. /// When stream is polled, new uffd is retrieved. /// When uffd is polled, page fault is handled by diff --git a/src/firecracker/examples/uffd/valid_handler.rs b/src/firecracker/examples/uffd/valid_handler.rs index cfc5faf432c..6c681d932ac 100644 --- a/src/firecracker/examples/uffd/valid_handler.rs +++ b/src/firecracker/examples/uffd/valid_handler.rs @@ -24,6 +24,7 @@ fn main() { let (stream, _) = listener.accept().expect("Cannot listen on UDS socket"); let mut runtime = Runtime::new(stream, file); + runtime.install_panic_hook(); runtime.run(|uffd_handler: &mut UffdHandler| { // Read an event from the userfaultfd. let event = uffd_handler diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index 278cb9ecd60..f93a0dabf19 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -310,6 +310,9 @@ def kill(self): if self.screen_pid: os.kill(self.screen_pid, signal.SIGKILL) except: + LOG.error( + "Failed to kill Firecracker Process. Did it already die (or did the UFFD handler process die and take it down)?" + ) LOG.error(self.log_data) raise From 3afaf949ed220570cafe0ba84ce7580fe469884b Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Fri, 10 Jan 2025 12:55:09 +0000 Subject: [PATCH 39/78] Drop `Serialize` implementation on `MachineConfigUpdate` We never make us of this, and I do not see where this ever _could_ be useful. Signed-off-by: Patrick Roy --- src/vmm/src/vmm_config/machine_config.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/vmm/src/vmm_config/machine_config.rs b/src/vmm/src/vmm_config/machine_config.rs index 8eee91c88be..58091af2b54 100644 --- a/src/vmm/src/vmm_config/machine_config.rs +++ b/src/vmm/src/vmm_config/machine_config.rs @@ -129,30 +129,28 @@ impl Default for MachineConfig { /// All fields are optional, but at least one needs to be specified. /// If a field is `Some(value)` then we assume an update is requested /// for that field. -#[derive(Clone, Default, Debug, PartialEq, Eq, Deserialize, Serialize)] +#[derive(Clone, Default, Debug, PartialEq, Eq, Deserialize)] #[serde(deny_unknown_fields)] pub struct MachineConfigUpdate { /// Number of vcpu to start. - #[serde(default, skip_serializing_if = "Option::is_none")] + #[serde(default)] pub vcpu_count: Option, /// The memory size in MiB. - #[serde(skip_serializing_if = "Option::is_none")] pub mem_size_mib: Option, /// Enables or disabled SMT. - #[serde(default, skip_serializing_if = "Option::is_none")] + #[serde(default)] pub smt: Option, /// A CPU template that it is used to filter the CPU features exposed to the guest. - #[serde(default, skip_serializing_if = "Option::is_none")] + #[serde(default)] pub cpu_template: Option, /// Enables or disables dirty page tracking. Enabling allows incremental snapshots. - #[serde(skip_serializing_if = "Option::is_none")] pub track_dirty_pages: Option, /// Configures what page size Firecracker should use to back guest memory. - #[serde(default, skip_serializing_if = "Option::is_none")] + #[serde(default)] pub huge_pages: Option, /// GDB socket address. #[cfg(feature = "gdb")] - #[serde(default, skip_serializing_if = "Option::is_none")] + #[serde(default)] pub gdb_socket_path: Option, } From 9cf1e6deeacfe2e6f0b85a501213cbbdb4434698 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Fri, 10 Jan 2025 12:59:11 +0000 Subject: [PATCH 40/78] fix: make `mem_size_mib` and `track_dirty_pages` optional This makes these fields optional in PATCH /machine-config requests. The comment on this structure says that all fields should be optional, and I dont quite see why these two should be different. Thus, add `serde(default)` to avoid forcing customers to explicitly set them to `null` if they do not want to update these parts of the machine config. Signed-off-by: Patrick Roy --- CHANGELOG.md | 4 ++++ src/vmm/src/vmm_config/machine_config.rs | 2 ++ 2 files changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c993dfe8bb2..138ccd20079 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,10 @@ and this project adheres to - [#4916](https://github.com/firecracker-microvm/firecracker/pull/4916): Fixed `IovDeque` implementation to work with any host page size. This fixes virtio-net device on non 4K host kernels. +- [#4991](https://github.com/firecracker-microvm/firecracker/pull/4991): Fixed + `mem_size_mib` and `track_dirty_pages` being mandatory for all + `PATCH /machine-config` requests. Now, they can be omitted which leaves these + parts of the machine configuration unchanged. ## [1.10.1] diff --git a/src/vmm/src/vmm_config/machine_config.rs b/src/vmm/src/vmm_config/machine_config.rs index 58091af2b54..482fcfdd08f 100644 --- a/src/vmm/src/vmm_config/machine_config.rs +++ b/src/vmm/src/vmm_config/machine_config.rs @@ -136,6 +136,7 @@ pub struct MachineConfigUpdate { #[serde(default)] pub vcpu_count: Option, /// The memory size in MiB. + #[serde(default)] pub mem_size_mib: Option, /// Enables or disabled SMT. #[serde(default)] @@ -144,6 +145,7 @@ pub struct MachineConfigUpdate { #[serde(default)] pub cpu_template: Option, /// Enables or disables dirty page tracking. Enabling allows incremental snapshots. + #[serde(default)] pub track_dirty_pages: Option, /// Configures what page size Firecracker should use to back guest memory. #[serde(default)] From 3793b9949774cf1d11abb2ef664664b8f40958a0 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Fri, 10 Jan 2025 13:20:30 +0000 Subject: [PATCH 41/78] refactor: Get rid of `VmConfig` This struct is almost a 1:1 duplication of `struct MachineConfig`, with only a single deviation when it comes to CPU template handling (see below). This makes it very annoying to add new fields to the /machine-config endpoint, because counter-intuitively we have to hand-edit at least 3 structs to add new fields to. We can get rid of this duplication by merging VmConfig and MachineConfig into just `MachineConfig` (that's what the endpoint is called, so having the struct be the same makes sense). We now need to handle a bit of nasty-ness when it comes to CPU templates, because /machine-config can only be used for specifying static cpu templates, while a VmConfig is used to hold whatever CPU template is stored, in whatever way. However, we can handle this at the serde layer, by making serialize/deserialize ignore the field if it doesnt contain a static template. This is ugly, but since static templates are deprecated, we have line of sight to getting rid of this weirdness when we release 2.0. While we're at it, opportunistically rename functions etc to uniformly call this thing a "machine config" instead of a "vm config". Signed-off-by: Patrick Roy --- src/cpu-template-helper/src/main.rs | 2 +- .../src/api_server/parsed_request.rs | 4 +- .../request/machine_configuration.rs | 16 +- src/vmm/benches/memory_access.rs | 6 +- src/vmm/src/builder.rs | 29 +-- src/vmm/src/persist.rs | 18 +- src/vmm/src/resources.rs | 105 ++++++----- src/vmm/src/rpc_interface.rs | 33 ++-- src/vmm/src/test_utils/mock_resources/mod.rs | 4 +- src/vmm/src/vmm_config/machine_config.rs | 176 ++++++++++++------ src/vmm/tests/integration_tests.rs | 7 +- 11 files changed, 234 insertions(+), 166 deletions(-) diff --git a/src/cpu-template-helper/src/main.rs b/src/cpu-template-helper/src/main.rs index 84a127e353a..35b7ea22d82 100644 --- a/src/cpu-template-helper/src/main.rs +++ b/src/cpu-template-helper/src/main.rs @@ -161,7 +161,7 @@ fn run(cli: Cli) -> Result<(), HelperError> { let (vmm, vm_resources) = utils::build_microvm_from_config(config, template)?; let cpu_template = vm_resources - .vm_config + .machine_config .cpu_template .get_cpu_template()? .into_owned(); diff --git a/src/firecracker/src/api_server/parsed_request.rs b/src/firecracker/src/api_server/parsed_request.rs index 10405c156ec..41d625e9abe 100644 --- a/src/firecracker/src/api_server/parsed_request.rs +++ b/src/firecracker/src/api_server/parsed_request.rs @@ -163,8 +163,8 @@ impl ParsedRequest { info!("The request was executed successfully. Status code: 204 No Content."); Response::new(Version::Http11, StatusCode::NoContent) } - VmmData::MachineConfiguration(vm_config) => { - Self::success_response_with_data(vm_config) + VmmData::MachineConfiguration(machine_config) => { + Self::success_response_with_data(machine_config) } VmmData::MmdsValue(value) => Self::success_response_with_mmds_value(value), VmmData::BalloonConfig(balloon_config) => { diff --git a/src/firecracker/src/api_server/request/machine_configuration.rs b/src/firecracker/src/api_server/request/machine_configuration.rs index 871bbda5ecc..2409aa06cac 100644 --- a/src/firecracker/src/api_server/request/machine_configuration.rs +++ b/src/firecracker/src/api_server/request/machine_configuration.rs @@ -31,7 +31,8 @@ pub(crate) fn parse_put_machine_config(body: &Body) -> Result Result Result pub fn configure_system_for_boot( vmm: &mut Vmm, vcpus: &mut [Vcpu], - vm_config: &VmConfig, + machine_config: &MachineConfig, cpu_template: &CustomCpuTemplate, entry_addr: GuestAddress, initrd: &Option, @@ -793,8 +796,8 @@ pub fn configure_system_for_boot( let cpu_config = CpuConfiguration::apply_template(cpu_config, cpu_template)?; let vcpu_config = VcpuConfig { - vcpu_count: vm_config.vcpu_count, - smt: vm_config.smt, + vcpu_count: machine_config.vcpu_count, + smt: machine_config.smt, cpu_config, }; diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 1feef41ec30..621d95d1e87 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -32,7 +32,7 @@ use crate::snapshot::Snapshot; use crate::utils::u64_to_usize; use crate::vmm_config::boot_source::BootSourceConfig; use crate::vmm_config::instance_info::InstanceInfo; -use crate::vmm_config::machine_config::{HugePageConfig, MachineConfigUpdate, VmConfigError}; +use crate::vmm_config::machine_config::{HugePageConfig, MachineConfigError, MachineConfigUpdate}; use crate::vmm_config::snapshot::{ CreateSnapshotParams, LoadSnapshotParams, MemBackendType, SnapshotType, }; @@ -61,11 +61,11 @@ pub struct VmInfo { impl From<&VmResources> for VmInfo { fn from(value: &VmResources) -> Self { Self { - mem_size_mib: value.vm_config.mem_size_mib as u64, - smt: value.vm_config.smt, - cpu_template: StaticCpuTemplate::from(&value.vm_config.cpu_template), + mem_size_mib: value.machine_config.mem_size_mib as u64, + smt: value.machine_config.smt, + cpu_template: StaticCpuTemplate::from(&value.machine_config.cpu_template), boot_source: value.boot_source.config.clone(), - huge_pages: value.vm_config.huge_pages, + huge_pages: value.machine_config.huge_pages, } } } @@ -422,11 +422,11 @@ pub fn restore_from_snapshot( .vcpu_states .len() .try_into() - .map_err(|_| VmConfigError::InvalidVcpuCount) + .map_err(|_| MachineConfigError::InvalidVcpuCount) .map_err(BuildMicrovmFromSnapshotError::VmUpdateConfig)?; vm_resources - .update_vm_config(&MachineConfigUpdate { + .update_machine_config(&MachineConfigUpdate { vcpu_count: Some(vcpu_count), mem_size_mib: Some(u64_to_usize(microvm_state.vm_info.mem_size_mib)), smt: Some(microvm_state.vm_info.smt), @@ -450,7 +450,7 @@ pub fn restore_from_snapshot( mem_backend_path, mem_state, track_dirty_pages, - vm_resources.vm_config.huge_pages, + vm_resources.machine_config.huge_pages, ) .map_err(RestoreFromSnapshotGuestMemoryError::File)?, None, @@ -462,7 +462,7 @@ pub fn restore_from_snapshot( // We enable the UFFD_FEATURE_EVENT_REMOVE feature only if a balloon device // is present in the microVM state. microvm_state.device_states.balloon_device.is_some(), - vm_resources.vm_config.huge_pages, + vm_resources.machine_config.huge_pages, ) .map_err(RestoreFromSnapshotGuestMemoryError::Uffd)?, }; diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index 0cde08a844d..d0c80789681 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -22,7 +22,7 @@ use crate::vmm_config::drive::*; use crate::vmm_config::entropy::*; use crate::vmm_config::instance_info::InstanceInfo; use crate::vmm_config::machine_config::{ - HugePageConfig, MachineConfig, MachineConfigUpdate, VmConfig, VmConfigError, + HugePageConfig, MachineConfig, MachineConfigError, MachineConfigUpdate, }; use crate::vmm_config::metrics::{init_metrics, MetricsConfig, MetricsConfigError}; use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError}; @@ -54,7 +54,7 @@ pub enum ResourcesError { /// Network device error: {0} NetDevice(#[from] NetworkInterfaceError), /// VM config error: {0} - VmConfig(#[from] VmConfigError), + MachineConfig(#[from] MachineConfigError), /// Vsock device error: {0} VsockDevice(#[from] VsockConfigError), /// Entropy device error: {0} @@ -93,7 +93,7 @@ pub struct VmmConfig { #[derive(Debug, Default)] pub struct VmResources { /// The vCpu and memory configuration for this microVM. - pub vm_config: VmConfig, + pub machine_config: MachineConfig, /// The boot source spec (contains both config and builder) for this microVM. pub boot_source: BootSource, /// The block devices. @@ -140,7 +140,7 @@ impl VmResources { }; if let Some(machine_config) = vmm_config.machine_config { let machine_config = MachineConfigUpdate::from(machine_config); - resources.update_vm_config(&machine_config)?; + resources.update_machine_config(&machine_config)?; } if let Some(cpu_config) = vmm_config.cpu_config { @@ -219,7 +219,7 @@ impl VmResources { SharedDeviceType::Balloon(balloon) => { self.balloon.set_device(balloon); - if self.vm_config.huge_pages != HugePageConfig::None { + if self.machine_config.huge_pages != HugePageConfig::None { return Err(ResourcesError::BalloonDevice(BalloonConfigError::HugePages)); } } @@ -238,16 +238,19 @@ impl VmResources { /// Add a custom CPU template to the VM resources /// to configure vCPUs. pub fn set_custom_cpu_template(&mut self, cpu_template: CustomCpuTemplate) { - self.vm_config.set_custom_cpu_template(cpu_template); + self.machine_config.set_custom_cpu_template(cpu_template); } /// Updates the configuration of the microVM. - pub fn update_vm_config(&mut self, update: &MachineConfigUpdate) -> Result<(), VmConfigError> { + pub fn update_machine_config( + &mut self, + update: &MachineConfigUpdate, + ) -> Result<(), MachineConfigError> { if update.huge_pages.is_some() && update.huge_pages != Some(HugePageConfig::None) { log_dev_preview_warning("Huge pages support", None); } - let updated = self.vm_config.update(update)?; + let updated = self.machine_config.update(update)?; // The VM cannot have a memory size smaller than the target size // of the balloon device, if present. @@ -256,23 +259,23 @@ impl VmResources { < self .balloon .get_config() - .map_err(|_| VmConfigError::InvalidVmState)? + .map_err(|_| MachineConfigError::InvalidVmState)? .amount_mib as usize { - return Err(VmConfigError::IncompatibleBalloonSize); + return Err(MachineConfigError::IncompatibleBalloonSize); } if self.balloon.get().is_some() && updated.huge_pages != HugePageConfig::None { - return Err(VmConfigError::BalloonAndHugePages); + return Err(MachineConfigError::BalloonAndHugePages); } if self.boot_source.config.initrd_path.is_some() && updated.huge_pages != HugePageConfig::None { - return Err(VmConfigError::InitrdAndHugePages); + return Err(MachineConfigError::InitrdAndHugePages); } - self.vm_config = updated; + self.machine_config = updated; Ok(()) } @@ -322,11 +325,11 @@ impl VmResources { ) -> Result<(), BalloonConfigError> { // The balloon cannot have a target size greater than the size of // the guest memory. - if config.amount_mib as usize > self.vm_config.mem_size_mib { + if config.amount_mib as usize > self.machine_config.mem_size_mib { return Err(BalloonConfigError::TooManyPagesRequested); } - if self.vm_config.huge_pages != HugePageConfig::None { + if self.machine_config.huge_pages != HugePageConfig::None { return Err(BalloonConfigError::HugePages); } @@ -339,7 +342,7 @@ impl VmResources { boot_source_cfg: BootSourceConfig, ) -> Result<(), BootSourceConfigError> { if boot_source_cfg.initrd_path.is_some() - && self.vm_config.huge_pages != HugePageConfig::None + && self.machine_config.huge_pages != HugePageConfig::None { return Err(BootSourceConfigError::HugePagesAndInitRd); } @@ -480,16 +483,16 @@ impl VmResources { // that would not be worth the effort. if vhost_user_device_used { GuestMemoryMmap::memfd_backed( - self.vm_config.mem_size_mib, - self.vm_config.track_dirty_pages, - self.vm_config.huge_pages, + self.machine_config.mem_size_mib, + self.machine_config.track_dirty_pages, + self.machine_config.huge_pages, ) } else { - let regions = crate::arch::arch_memory_regions(self.vm_config.mem_size_mib << 20); + let regions = crate::arch::arch_memory_regions(self.machine_config.mem_size_mib << 20); GuestMemoryMmap::from_raw_regions( ®ions, - self.vm_config.track_dirty_pages, - self.vm_config.huge_pages, + self.machine_config.track_dirty_pages, + self.machine_config.huge_pages, ) } } @@ -503,7 +506,7 @@ impl From<&VmResources> for VmmConfig { boot_source: resources.boot_source.config.clone(), cpu_config: None, logger: None, - machine_config: Some(MachineConfig::from(&resources.vm_config)), + machine_config: Some(resources.machine_config.clone()), metrics: None, mmds_config: resources.mmds_config(), net_devices: resources.net_builder.configs(), @@ -535,7 +538,7 @@ mod tests { BootConfig, BootSource, BootSourceConfig, DEFAULT_KERNEL_CMDLINE, }; use crate::vmm_config::drive::{BlockBuilder, BlockDeviceConfig}; - use crate::vmm_config::machine_config::{HugePageConfig, MachineConfig, VmConfigError}; + use crate::vmm_config::machine_config::{HugePageConfig, MachineConfig, MachineConfigError}; use crate::vmm_config::net::{NetBuilder, NetworkInterfaceConfig}; use crate::vmm_config::vsock::tests::default_config; use crate::vmm_config::RateLimiterConfig; @@ -608,7 +611,7 @@ mod tests { fn default_vm_resources() -> VmResources { VmResources { - vm_config: VmConfig::default(), + machine_config: MachineConfig::default(), boot_source: default_boot_cfg(), block: default_blocks(), vsock: Default::default(), @@ -821,7 +824,7 @@ mod tests { assert!( matches!( error, - ResourcesError::VmConfig(VmConfigError::InvalidMemorySize) + ResourcesError::MachineConfig(MachineConfigError::InvalidMemorySize) ), "{:?}", error @@ -1135,7 +1138,7 @@ mod tests { ) .unwrap(); assert_eq!( - vm_resources.vm_config.cpu_template, + vm_resources.machine_config.cpu_template, Some(CpuTemplateType::Custom(CustomCpuTemplate::default())) ); } @@ -1339,7 +1342,7 @@ mod tests { } #[test] - fn test_update_vm_config() { + fn test_update_machine_config() { let mut vm_resources = default_vm_resources(); let mut aux_vm_config = MachineConfigUpdate { vcpu_count: Some(32), @@ -1354,25 +1357,25 @@ mod tests { }; assert_ne!( - MachineConfigUpdate::from(MachineConfig::from(&vm_resources.vm_config)), + MachineConfigUpdate::from(vm_resources.machine_config.clone()), aux_vm_config ); - vm_resources.update_vm_config(&aux_vm_config).unwrap(); + vm_resources.update_machine_config(&aux_vm_config).unwrap(); assert_eq!( - MachineConfigUpdate::from(MachineConfig::from(&vm_resources.vm_config)), + MachineConfigUpdate::from(vm_resources.machine_config.clone()), aux_vm_config ); // Invalid vcpu count. aux_vm_config.vcpu_count = Some(0); assert_eq!( - vm_resources.update_vm_config(&aux_vm_config), - Err(VmConfigError::InvalidVcpuCount) + vm_resources.update_machine_config(&aux_vm_config), + Err(MachineConfigError::InvalidVcpuCount) ); aux_vm_config.vcpu_count = Some(33); assert_eq!( - vm_resources.update_vm_config(&aux_vm_config), - Err(VmConfigError::InvalidVcpuCount) + vm_resources.update_machine_config(&aux_vm_config), + Err(MachineConfigError::InvalidVcpuCount) ); // Check that SMT is not supported on aarch64, and that on x86_64 enabling it requires vcpu @@ -1380,29 +1383,29 @@ mod tests { aux_vm_config.smt = Some(true); #[cfg(target_arch = "aarch64")] assert_eq!( - vm_resources.update_vm_config(&aux_vm_config), - Err(VmConfigError::SmtNotSupported) + vm_resources.update_machine_config(&aux_vm_config), + Err(MachineConfigError::SmtNotSupported) ); aux_vm_config.vcpu_count = Some(3); #[cfg(target_arch = "x86_64")] assert_eq!( - vm_resources.update_vm_config(&aux_vm_config), - Err(VmConfigError::InvalidVcpuCount) + vm_resources.update_machine_config(&aux_vm_config), + Err(MachineConfigError::InvalidVcpuCount) ); aux_vm_config.vcpu_count = Some(32); #[cfg(target_arch = "x86_64")] - vm_resources.update_vm_config(&aux_vm_config).unwrap(); + vm_resources.update_machine_config(&aux_vm_config).unwrap(); aux_vm_config.smt = Some(false); // Invalid mem_size_mib. aux_vm_config.mem_size_mib = Some(0); assert_eq!( - vm_resources.update_vm_config(&aux_vm_config), - Err(VmConfigError::InvalidMemorySize) + vm_resources.update_machine_config(&aux_vm_config), + Err(MachineConfigError::InvalidMemorySize) ); // Incompatible mem_size_mib with balloon size. - vm_resources.vm_config.mem_size_mib = 128; + vm_resources.machine_config.mem_size_mib = 128; vm_resources .set_balloon_device(BalloonDeviceConfig { amount_mib: 100, @@ -1412,20 +1415,22 @@ mod tests { .unwrap(); aux_vm_config.mem_size_mib = Some(90); assert_eq!( - vm_resources.update_vm_config(&aux_vm_config), - Err(VmConfigError::IncompatibleBalloonSize) + vm_resources.update_machine_config(&aux_vm_config), + Err(MachineConfigError::IncompatibleBalloonSize) ); // mem_size_mib compatible with balloon size. aux_vm_config.mem_size_mib = Some(256); - vm_resources.update_vm_config(&aux_vm_config).unwrap(); + vm_resources.update_machine_config(&aux_vm_config).unwrap(); // mem_size_mib incompatible with huge pages configuration aux_vm_config.mem_size_mib = Some(129); aux_vm_config.huge_pages = Some(HugePageConfig::Hugetlbfs2M); assert_eq!( - vm_resources.update_vm_config(&aux_vm_config).unwrap_err(), - VmConfigError::InvalidMemorySize + vm_resources + .update_machine_config(&aux_vm_config) + .unwrap_err(), + MachineConfigError::InvalidMemorySize ); // mem_size_mib compatible with huge page configuration @@ -1433,7 +1438,7 @@ mod tests { // Remove the balloon device config that's added by `default_vm_resources` as it would // trigger the "ballooning incompatible with huge pages" check. vm_resources.balloon = BalloonBuilder::new(); - vm_resources.update_vm_config(&aux_vm_config).unwrap(); + vm_resources.update_machine_config(&aux_vm_config).unwrap(); } #[test] @@ -1474,7 +1479,7 @@ mod tests { let mut vm_resources = default_vm_resources(); vm_resources.balloon = BalloonBuilder::new(); vm_resources - .update_vm_config(&MachineConfigUpdate { + .update_machine_config(&MachineConfigUpdate { huge_pages: Some(HugePageConfig::Hugetlbfs2M), ..Default::default() }) diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index 60a046f7e89..82993fcafea 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -26,7 +26,7 @@ use crate::vmm_config::boot_source::{BootSourceConfig, BootSourceConfigError}; use crate::vmm_config::drive::{BlockDeviceConfig, BlockDeviceUpdateConfig, DriveError}; use crate::vmm_config::entropy::{EntropyDeviceConfig, EntropyDeviceError}; use crate::vmm_config::instance_info::InstanceInfo; -use crate::vmm_config::machine_config::{MachineConfig, MachineConfigUpdate, VmConfigError}; +use crate::vmm_config::machine_config::{MachineConfig, MachineConfigError, MachineConfigUpdate}; use crate::vmm_config::metrics::{MetricsConfig, MetricsConfigError}; use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError}; use crate::vmm_config::net::{ @@ -120,7 +120,7 @@ pub enum VmmAction { UpdateNetworkInterface(NetworkInterfaceUpdateConfig), /// Update the microVM configuration (memory & vcpu) using `VmUpdateConfig` as input. This /// action can only be called before the microVM has booted. - UpdateVmConfiguration(MachineConfigUpdate), + UpdateMachineConfiguration(MachineConfigUpdate), } /// Wrapper for all errors associated with VMM actions. @@ -145,7 +145,7 @@ pub enum VmmActionError { /// Logger error: {0} Logger(#[from] crate::logger::LoggerUpdateError), /// Machine config error: {0} - MachineConfig(#[from] VmConfigError), + MachineConfig(#[from] MachineConfigError), /// Metrics error: {0} Metrics(#[from] MetricsConfigError), #[from(ignore)] @@ -415,9 +415,9 @@ impl<'a> PrebootApiController<'a> { Ok(VmmData::FullVmConfig((&*self.vm_resources).into())) } GetMMDS => self.get_mmds(), - GetVmMachineConfig => Ok(VmmData::MachineConfiguration(MachineConfig::from( - &self.vm_resources.vm_config, - ))), + GetVmMachineConfig => Ok(VmmData::MachineConfiguration( + self.vm_resources.machine_config.clone(), + )), GetVmInstanceInfo => Ok(VmmData::InstanceInformation(self.instance_info.clone())), GetVmmVersion => Ok(VmmData::VmmVersion(self.instance_info.vmm_version.clone())), InsertBlockDevice(config) => self.insert_block_device(config), @@ -434,7 +434,7 @@ impl<'a> PrebootApiController<'a> { SetVsockDevice(config) => self.set_vsock_device(config), SetMmdsConfiguration(config) => self.set_mmds_config(config), StartMicroVm => self.start_microvm(), - UpdateVmConfiguration(config) => self.update_vm_config(config), + UpdateMachineConfiguration(config) => self.update_machine_config(config), SetEntropyDevice(config) => self.set_entropy_device(config), // Operations not allowed pre-boot. CreateSnapshot(_) @@ -502,10 +502,13 @@ impl<'a> PrebootApiController<'a> { .map_err(VmmActionError::MmdsConfig) } - fn update_vm_config(&mut self, cfg: MachineConfigUpdate) -> Result { + fn update_machine_config( + &mut self, + cfg: MachineConfigUpdate, + ) -> Result { self.boot_path = true; self.vm_resources - .update_vm_config(&cfg) + .update_machine_config(&cfg) .map(|()| VmmData::Empty) .map_err(VmmActionError::MachineConfig) } @@ -641,9 +644,9 @@ impl RuntimeApiController { .map_err(|err| VmmActionError::BalloonConfig(BalloonConfigError::from(err))), GetFullVmConfig => Ok(VmmData::FullVmConfig((&self.vm_resources).into())), GetMMDS => self.get_mmds(), - GetVmMachineConfig => Ok(VmmData::MachineConfiguration(MachineConfig::from( - &self.vm_resources.vm_config, - ))), + GetVmMachineConfig => Ok(VmmData::MachineConfiguration( + self.vm_resources.machine_config.clone(), + )), GetVmInstanceInfo => Ok(VmmData::InstanceInformation( self.vmm.lock().expect("Poisoned lock").instance_info(), )), @@ -686,7 +689,7 @@ impl RuntimeApiController { | SetMmdsConfiguration(_) | SetEntropyDevice(_) | StartMicroVm - | UpdateVmConfiguration(_) => Err(VmmActionError::OperationNotSupportedPostBoot), + | UpdateMachineConfiguration(_) => Err(VmmActionError::OperationNotSupportedPostBoot), } } @@ -753,7 +756,7 @@ impl RuntimeApiController { log_dev_preview_warning("Virtual machine snapshots", None); if create_params.snapshot_type == SnapshotType::Diff - && !self.vm_resources.vm_config.track_dirty_pages + && !self.vm_resources.machine_config.track_dirty_pages { return Err(VmmActionError::NotSupported( "Diff snapshots are not allowed on uVMs with dirty page tracking disabled." @@ -1254,7 +1257,7 @@ mod tests { network_interfaces: Vec::new(), }, ))); - check_unsupported(runtime_request(VmmAction::UpdateVmConfiguration( + check_unsupported(runtime_request(VmmAction::UpdateMachineConfiguration( MachineConfigUpdate::from(MachineConfig::default()), ))); check_unsupported(runtime_request(VmmAction::LoadSnapshot( diff --git a/src/vmm/src/test_utils/mock_resources/mod.rs b/src/vmm/src/test_utils/mock_resources/mod.rs index 9f4406ab280..f8485bf9678 100644 --- a/src/vmm/src/test_utils/mock_resources/mod.rs +++ b/src/vmm/src/test_utils/mock_resources/mod.rs @@ -81,12 +81,12 @@ impl MockVmResources { pub fn with_vm_config(mut self, vm_config: MachineConfig) -> Self { let machine_config = MachineConfigUpdate::from(vm_config); - self.0.update_vm_config(&machine_config).unwrap(); + self.0.update_machine_config(&machine_config).unwrap(); self } pub fn set_cpu_template(&mut self, cpu_template: CustomCpuTemplate) { - self.0.vm_config.set_custom_cpu_template(cpu_template); + self.0.machine_config.set_custom_cpu_template(cpu_template); } } diff --git a/src/vmm/src/vmm_config/machine_config.rs b/src/vmm/src/vmm_config/machine_config.rs index 482fcfdd08f..092179f5ff5 100644 --- a/src/vmm/src/vmm_config/machine_config.rs +++ b/src/vmm/src/vmm_config/machine_config.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use std::fmt::Debug; -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; use crate::cpu_config::templates::{CpuTemplateType, CustomCpuTemplate, StaticCpuTemplate}; @@ -15,7 +15,7 @@ pub const MAX_SUPPORTED_VCPUS: u8 = 32; /// Errors associated with configuring the microVM. #[rustfmt::skip] #[derive(Debug, thiserror::Error, displaydoc::Display, PartialEq, Eq)] -pub enum VmConfigError { +pub enum MachineConfigError { /// The memory size (MiB) is smaller than the previously set balloon device target size. IncompatibleBalloonSize, /// The memory size (MiB) is either 0, or not a multiple of the configured page size. @@ -103,8 +103,14 @@ pub struct MachineConfig { #[serde(default)] pub smt: bool, /// A CPU template that it is used to filter the CPU features exposed to the guest. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub cpu_template: Option, + // FIXME: once support for static CPU templates is removed, this field can be dropped altogether + #[serde( + default, + skip_serializing_if = "is_none_or_custom_template", + deserialize_with = "deserialize_static_template", + serialize_with = "serialize_static_template" + )] + pub cpu_template: Option, /// Enables or disables dirty page tracking. Enabling allows incremental snapshots. #[serde(default)] pub track_dirty_pages: bool, @@ -117,14 +123,50 @@ pub struct MachineConfig { pub gdb_socket_path: Option, } +fn is_none_or_custom_template(template: &Option) -> bool { + matches!(template, None | Some(CpuTemplateType::Custom(_))) +} + +fn deserialize_static_template<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + Option::::deserialize(deserializer) + .map(|maybe_template| maybe_template.map(CpuTemplateType::Static)) +} + +fn serialize_static_template( + template: &Option, + serializer: S, +) -> Result +where + S: Serializer, +{ + let Some(CpuTemplateType::Static(template)) = template else { + // We have a skip_serializing_if on the field + unreachable!() + }; + + template.serialize(serializer) +} + impl Default for MachineConfig { fn default() -> Self { - Self::from(&VmConfig::default()) + Self { + vcpu_count: 1, + mem_size_mib: DEFAULT_MEM_SIZE_MIB, + smt: false, + cpu_template: None, + track_dirty_pages: false, + huge_pages: HugePageConfig::None, + #[cfg(feature = "gdb")] + gdb_socket_path: None, + } } } /// Struct used in PATCH `/machine-config` API call. -/// Used to update `VmConfig` in `VmResources`. +/// Used to update `MachineConfig` in `VmResources`. /// This struct mirrors all the fields in `MachineConfig`. /// All fields are optional, but at least one needs to be specified. /// If a field is `Some(value)` then we assume an update is requested @@ -171,7 +213,7 @@ impl From for MachineConfigUpdate { vcpu_count: Some(cfg.vcpu_count), mem_size_mib: Some(cfg.mem_size_mib), smt: Some(cfg.smt), - cpu_template: cfg.cpu_template, + cpu_template: cfg.static_template(), track_dirty_pages: Some(cfg.track_dirty_pages), huge_pages: Some(cfg.huge_pages), #[cfg(feature = "gdb")] @@ -180,62 +222,52 @@ impl From for MachineConfigUpdate { } } -/// Configuration of the microvm. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct VmConfig { - /// Number of vcpu to start. - pub vcpu_count: u8, - /// The memory size in MiB. - pub mem_size_mib: usize, - /// Enables or disabled SMT. - pub smt: bool, - /// A CPU template that it is used to filter the CPU features exposed to the guest. - pub cpu_template: Option, - /// Enables or disables dirty page tracking. Enabling allows incremental snapshots. - pub track_dirty_pages: bool, - /// Configures what page size Firecracker should use to back guest memory. - pub huge_pages: HugePageConfig, - /// GDB socket address. - #[cfg(feature = "gdb")] - pub gdb_socket_path: Option, -} - -impl VmConfig { +impl MachineConfig { /// Sets cpu tempalte field to `CpuTemplateType::Custom(cpu_template)`. pub fn set_custom_cpu_template(&mut self, cpu_template: CustomCpuTemplate) { self.cpu_template = Some(CpuTemplateType::Custom(cpu_template)); } - /// Updates [`VmConfig`] with [`MachineConfigUpdate`]. + fn static_template(&self) -> Option { + match self.cpu_template { + Some(CpuTemplateType::Static(template)) => Some(template), + _ => None, + } + } + + /// Updates [`MachineConfig`] with [`MachineConfigUpdate`]. /// Mapping for cpu template update: /// StaticCpuTemplate::None -> None /// StaticCpuTemplate::Other -> Some(CustomCpuTemplate::Static(Other)), - /// Returns the updated `VmConfig` object. - pub fn update(&self, update: &MachineConfigUpdate) -> Result { + /// Returns the updated `MachineConfig` object. + pub fn update( + &self, + update: &MachineConfigUpdate, + ) -> Result { let vcpu_count = update.vcpu_count.unwrap_or(self.vcpu_count); let smt = update.smt.unwrap_or(self.smt); #[cfg(target_arch = "aarch64")] if smt { - return Err(VmConfigError::SmtNotSupported); + return Err(MachineConfigError::SmtNotSupported); } if vcpu_count == 0 || vcpu_count > MAX_SUPPORTED_VCPUS { - return Err(VmConfigError::InvalidVcpuCount); + return Err(MachineConfigError::InvalidVcpuCount); } // If SMT is enabled or is to be enabled in this call // only allow vcpu count to be 1 or even. if smt && vcpu_count > 1 && vcpu_count % 2 == 1 { - return Err(VmConfigError::InvalidVcpuCount); + return Err(MachineConfigError::InvalidVcpuCount); } let mem_size_mib = update.mem_size_mib.unwrap_or(self.mem_size_mib); let page_config = update.huge_pages.unwrap_or(self.huge_pages); if mem_size_mib == 0 || !page_config.is_valid_mem_size(mem_size_mib) { - return Err(VmConfigError::InvalidMemorySize); + return Err(MachineConfigError::InvalidMemorySize); } let cpu_template = match update.cpu_template { @@ -244,7 +276,7 @@ impl VmConfig { Some(other) => Some(CpuTemplateType::Static(other)), }; - Ok(VmConfig { + Ok(MachineConfig { vcpu_count, mem_size_mib, smt, @@ -257,32 +289,54 @@ impl VmConfig { } } -impl Default for VmConfig { - fn default() -> Self { - Self { - vcpu_count: 1, - mem_size_mib: DEFAULT_MEM_SIZE_MIB, - smt: false, +#[cfg(test)] +mod tests { + use crate::cpu_config::templates::{CpuTemplateType, CustomCpuTemplate, StaticCpuTemplate}; + use crate::vmm_config::machine_config::MachineConfig; + + // Ensure the special (de)serialization logic for the cpu_template field works: + // only static cpu templates can be specified via the machine-config endpoint, but + // we still cram custom cpu templates into the MachineConfig struct if they're set otherwise + // Ensure that during (de)serialization we preserve static templates, but we set custom + // templates to None + #[test] + fn test_serialize_machine_config() { + #[cfg(target_arch = "aarch64")] + const TEMPLATE: StaticCpuTemplate = StaticCpuTemplate::V1N1; + #[cfg(target_arch = "x86_64")] + const TEMPLATE: StaticCpuTemplate = StaticCpuTemplate::T2S; + + let mconfig = MachineConfig { cpu_template: None, - track_dirty_pages: false, - huge_pages: HugePageConfig::None, - #[cfg(feature = "gdb")] - gdb_socket_path: None, - } - } -} + ..Default::default() + }; -impl From<&VmConfig> for MachineConfig { - fn from(value: &VmConfig) -> Self { - Self { - vcpu_count: value.vcpu_count, - mem_size_mib: value.mem_size_mib, - smt: value.smt, - cpu_template: value.cpu_template.as_ref().map(|template| template.into()), - track_dirty_pages: value.track_dirty_pages, - huge_pages: value.huge_pages, - #[cfg(feature = "gdb")] - gdb_socket_path: value.gdb_socket_path.clone(), - } + let serialized = serde_json::to_string(&mconfig).unwrap(); + let deserialized = serde_json::from_str::(&serialized).unwrap(); + + assert!(deserialized.cpu_template.is_none()); + + let mconfig = MachineConfig { + cpu_template: Some(CpuTemplateType::Static(TEMPLATE)), + ..Default::default() + }; + + let serialized = serde_json::to_string(&mconfig).unwrap(); + let deserialized = serde_json::from_str::(&serialized).unwrap(); + + assert_eq!( + deserialized.cpu_template, + Some(CpuTemplateType::Static(TEMPLATE)) + ); + + let mconfig = MachineConfig { + cpu_template: Some(CpuTemplateType::Custom(CustomCpuTemplate::default())), + ..Default::default() + }; + + let serialized = serde_json::to_string(&mconfig).unwrap(); + let deserialized = serde_json::from_str::(&serialized).unwrap(); + + assert!(deserialized.cpu_template.is_none()); } } diff --git a/src/vmm/tests/integration_tests.rs b/src/vmm/tests/integration_tests.rs index 40eab05c4a4..a66f29e7f55 100644 --- a/src/vmm/tests/integration_tests.rs +++ b/src/vmm/tests/integration_tests.rs @@ -22,7 +22,7 @@ use vmm::vmm_config::balloon::BalloonDeviceConfig; use vmm::vmm_config::boot_source::BootSourceConfig; use vmm::vmm_config::drive::BlockDeviceConfig; use vmm::vmm_config::instance_info::{InstanceInfo, VmState}; -use vmm::vmm_config::machine_config::{MachineConfig, MachineConfigUpdate, VmConfig}; +use vmm::vmm_config::machine_config::{MachineConfig, MachineConfigUpdate}; use vmm::vmm_config::net::NetworkInterfaceConfig; use vmm::vmm_config::snapshot::{ CreateSnapshotParams, LoadSnapshotParams, MemBackendConfig, MemBackendType, SnapshotType, @@ -188,7 +188,7 @@ fn verify_create_snapshot(is_diff: bool) -> (TempFile, TempFile) { let (vmm, _) = create_vmm(Some(NOISY_KERNEL_IMAGE), is_diff, true); let resources = VmResources { - vm_config: VmConfig { + machine_config: MachineConfig { mem_size_mib: 1, track_dirty_pages: is_diff, ..Default::default() @@ -403,6 +403,7 @@ fn test_preboot_load_snap_disallowed_after_boot_resources() { }); verify_load_snap_disallowed_after_boot_resources(req, "SetVsockDevice"); - let req = VmmAction::UpdateVmConfiguration(MachineConfigUpdate::from(MachineConfig::default())); + let req = + VmmAction::UpdateMachineConfiguration(MachineConfigUpdate::from(MachineConfig::default())); verify_load_snap_disallowed_after_boot_resources(req, "SetVmConfiguration"); } From 3fb06e940d8ad0508c31984cf0f8cb947282430b Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Fri, 10 Jan 2025 09:57:08 +0000 Subject: [PATCH 42/78] fix: avoid needless clone in build_microvm_from_snapshot There is no need to clone the GuestMemoryMmap here, as create_vmm_and_vcpus returns it again (as part of the Vmm object), and since later code in build_microvm_from_snapshot doesn't need to take ownership of the GuestMemoryMmap, we can just use references to this stored object, avoiding the clone. Signed-off-by: Patrick Roy --- src/vmm/src/builder.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 433d1fd4ff3..999e27415fc 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -472,7 +472,7 @@ pub fn build_microvm_from_snapshot( let (mut vmm, mut vcpus) = create_vmm_and_vcpus( instance_info, event_manager, - guest_memory.clone(), + guest_memory, uffd, vm_resources.machine_config.track_dirty_pages, vm_resources.machine_config.vcpu_count, @@ -517,7 +517,7 @@ pub fn build_microvm_from_snapshot( // Restore devices states. let mmio_ctor_args = MMIODevManagerConstructorArgs { - mem: &guest_memory, + mem: &vmm.guest_memory, vm: vmm.vm.fd(), event_manager, resource_allocator: &mut vmm.resource_allocator, @@ -532,7 +532,7 @@ pub fn build_microvm_from_snapshot( { let acpi_ctor_args = ACPIDeviceManagerConstructorArgs { - mem: &guest_memory, + mem: &vmm.guest_memory, resource_allocator: &mut vmm.resource_allocator, vm: vmm.vm.fd(), }; From d71ff8a79e5de7fff659cd953e8a591a9b78ef19 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Fri, 10 Jan 2025 11:27:02 +0000 Subject: [PATCH 43/78] feat: reset KVM_REG_ARM_PTIMER_CNT on VM boot Reset KVM_REG_ARM_PTIMER_CNT physical counter register on VM boot to avoid passing through host physical counter. Note that resetting the register on VM boot does not guarantee that VM will see the counter value 0 at startup because there is a delta in time between register reset and VM boot during which counter continues to advance. In order to check if the kernel supports the counter reset we query KVM_CAP_COUNTER_OFFSET capability and only reset the KVM_REG_ARM_PTIMER_CNT if it is present. Signed-off-by: Egor Lazarchuk --- src/vmm/src/arch/aarch64/regs.rs | 6 +++++ src/vmm/src/arch/aarch64/vcpu.rs | 42 ++++++++++++++++++++++++++++-- src/vmm/src/builder.rs | 29 +++++++++++++++------ src/vmm/src/vstate/kvm.rs | 18 ++++++++++++- src/vmm/src/vstate/vcpu/aarch64.rs | 9 +++++-- src/vmm/src/vstate/vcpu/mod.rs | 1 + 6 files changed, 92 insertions(+), 13 deletions(-) diff --git a/src/vmm/src/arch/aarch64/regs.rs b/src/vmm/src/arch/aarch64/regs.rs index 5238f58ba70..d844fbfb56b 100644 --- a/src/vmm/src/arch/aarch64/regs.rs +++ b/src/vmm/src/arch/aarch64/regs.rs @@ -99,6 +99,12 @@ arm64_sys_reg!(SYS_CNTV_CVAL_EL0, 3, 3, 14, 3, 2); // https://elixir.bootlin.com/linux/v6.8/source/arch/arm64/include/asm/sysreg.h#L459 arm64_sys_reg!(SYS_CNTPCT_EL0, 3, 3, 14, 0, 1); +// Physical Timer EL0 count Register +// The id of this register is same as SYS_CNTPCT_EL0, but KVM defines it +// separately, so we do as well. +// https://elixir.bootlin.com/linux/v6.12.6/source/arch/arm64/include/uapi/asm/kvm.h#L259 +arm64_sys_reg!(KVM_REG_ARM_PTIMER_CNT, 3, 3, 14, 0, 1); + // Translation Table Base Register // https://developer.arm.com/documentation/ddi0595/2021-03/AArch64-Registers/TTBR1-EL1--Translation-Table-Base-Register-1--EL1- arm64_sys_reg!(TTBR1_EL1, 3, 0, 2, 0, 1); diff --git a/src/vmm/src/arch/aarch64/vcpu.rs b/src/vmm/src/arch/aarch64/vcpu.rs index 859e2da2cb6..7b34ae91896 100644 --- a/src/vmm/src/arch/aarch64/vcpu.rs +++ b/src/vmm/src/arch/aarch64/vcpu.rs @@ -13,6 +13,7 @@ use kvm_ioctls::VcpuFd; use super::get_fdt_addr; use super::regs::*; +use crate::vstate::kvm::OptionalCapabilities; use crate::vstate::memory::GuestMemoryMmap; /// Errors thrown while setting aarch64 registers. @@ -78,6 +79,7 @@ pub fn setup_boot_regs( cpu_id: u8, boot_ip: u64, mem: &GuestMemoryMmap, + optional_capabilities: &OptionalCapabilities, ) -> Result<(), VcpuError> { let kreg_off = offset_of!(kvm_regs, regs); @@ -106,6 +108,23 @@ pub fn setup_boot_regs( vcpufd .set_one_reg(id, &get_fdt_addr(mem).to_le_bytes()) .map_err(|err| VcpuError::SetOneReg(id, err))?; + + // Reset the physical counter for the guest. This way we avoid guest reading + // host physical counter. + // Resetting KVM_REG_ARM_PTIMER_CNT for single vcpu is enough because there is only + // one timer struct with offsets per VM. + // Because the access to KVM_REG_ARM_PTIMER_CNT is only present starting 6.4 kernel, + // we only do the reset if KVM_CAP_COUNTER_OFFSET is present as it was added + // in the same patch series as the ability to set the KVM_REG_ARM_PTIMER_CNT register. + // Path series which introduced the needed changes: + // https://lore.kernel.org/all/20230330174800.2677007-1-maz@kernel.org/ + // Note: the value observed by the guest will still be above 0, because there is a delta + // time between this resetting and first call to KVM_RUN. + if optional_capabilities.counter_offset { + vcpufd + .set_one_reg(KVM_REG_ARM_PTIMER_CNT, &[0; 8]) + .map_err(|err| VcpuError::SetOneReg(id, err))?; + } } Ok(()) } @@ -226,8 +245,9 @@ mod tests { let vm = kvm.fd.create_vm().unwrap(); let vcpu = vm.create_vcpu(0).unwrap(); let mem = arch_mem(layout::FDT_MAX_SIZE + 0x1000); + let optional_capabilities = kvm.optional_capabilities(); - let res = setup_boot_regs(&vcpu, 0, 0x0, &mem); + let res = setup_boot_regs(&vcpu, 0, 0x0, &mem, &optional_capabilities); assert!(matches!( res.unwrap_err(), VcpuError::SetOneReg(0x6030000000100042, _) @@ -237,7 +257,25 @@ mod tests { vm.get_preferred_target(&mut kvi).unwrap(); vcpu.vcpu_init(&kvi).unwrap(); - setup_boot_regs(&vcpu, 0, 0x0, &mem).unwrap(); + setup_boot_regs(&vcpu, 0, 0x0, &mem, &optional_capabilities).unwrap(); + + // Check that the register is reset on compatible kernels. + // Because there is a delta in time between we reset the register and time we + // read it, we cannot compare with 0. Instead we compare it with meaningfully + // small value. + if optional_capabilities.counter_offset { + let mut reg_bytes = [0_u8; 8]; + vcpu.get_one_reg(SYS_CNTPCT_EL0, &mut reg_bytes).unwrap(); + let counter_value = u64::from_le_bytes(reg_bytes); + + // We are reading the SYS_CNTPCT_EL0 right after resetting it. + // If reset did happen successfully, the value should be quite small when we read it. + // If the reset did not happen, the value will be same as on the host and it surely + // will be more that MAX_VALUE. + let max_value = 1000; + + assert!(counter_value < max_value); + } } #[test] diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 6a59fe35a83..4b43e67541f 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -814,16 +814,16 @@ pub fn configure_system_for_boot( cpu_config, }; - // Configure vCPUs with normalizing and setting the generated CPU configuration. - for vcpu in vcpus.iter_mut() { - vcpu.kvm_vcpu - .configure(vmm.guest_memory(), entry_addr, &vcpu_config) - .map_err(VmmError::VcpuConfigure) - .map_err(Internal)?; - } - #[cfg(target_arch = "x86_64")] { + // Configure vCPUs with normalizing and setting the generated CPU configuration. + for vcpu in vcpus.iter_mut() { + vcpu.kvm_vcpu + .configure(vmm.guest_memory(), entry_addr, &vcpu_config) + .map_err(VmmError::VcpuConfigure) + .map_err(Internal)?; + } + // Write the kernel command line to guest memory. This is x86_64 specific, since on // aarch64 the command line will be specified through the FDT. let cmdline_size = boot_cmdline @@ -858,6 +858,19 @@ pub fn configure_system_for_boot( } #[cfg(target_arch = "aarch64")] { + let optional_capabilities = vmm.kvm.optional_capabilities(); + // Configure vCPUs with normalizing and setting the generated CPU configuration. + for vcpu in vcpus.iter_mut() { + vcpu.kvm_vcpu + .configure( + vmm.guest_memory(), + entry_addr, + &vcpu_config, + &optional_capabilities, + ) + .map_err(VmmError::VcpuConfigure) + .map_err(Internal)?; + } let vcpu_mpidr = vcpus .iter_mut() .map(|cpu| cpu.kvm_vcpu.get_mpidr()) diff --git a/src/vmm/src/vstate/kvm.rs b/src/vmm/src/vstate/kvm.rs index 985a9fae1b3..59b192dbe09 100644 --- a/src/vmm/src/vstate/kvm.rs +++ b/src/vmm/src/vstate/kvm.rs @@ -140,7 +140,13 @@ impl Kvm { } } } - +#[cfg(target_arch = "aarch64")] +/// Optional capabilities. +#[derive(Debug, Default)] +pub struct OptionalCapabilities { + /// KVM_CAP_COUNTER_OFFSET + pub counter_offset: bool, +} #[cfg(target_arch = "aarch64")] impl Kvm { const DEFAULT_CAPABILITIES: [u32; 7] = [ @@ -152,6 +158,16 @@ impl Kvm { kvm_bindings::KVM_CAP_MP_STATE, kvm_bindings::KVM_CAP_ONE_REG, ]; + + /// Returns struct with optional capabilities statuses. + pub fn optional_capabilities(&self) -> OptionalCapabilities { + OptionalCapabilities { + counter_offset: self + .fd + .check_extension_raw(kvm_bindings::KVM_CAP_COUNTER_OFFSET.into()) + != 0, + } + } } #[cfg(target_arch = "x86_64")] diff --git a/src/vmm/src/vstate/vcpu/aarch64.rs b/src/vmm/src/vstate/vcpu/aarch64.rs index 4097ef59044..9db1173eeb8 100644 --- a/src/vmm/src/vstate/vcpu/aarch64.rs +++ b/src/vmm/src/vstate/vcpu/aarch64.rs @@ -22,7 +22,7 @@ use crate::cpu_config::aarch64::custom_cpu_template::VcpuFeatures; use crate::cpu_config::templates::CpuConfiguration; use crate::logger::{error, IncMetric, METRICS}; use crate::vcpu::{VcpuConfig, VcpuError}; -use crate::vstate::kvm::Kvm; +use crate::vstate::kvm::{Kvm, OptionalCapabilities}; use crate::vstate::memory::{Address, GuestAddress, GuestMemoryMmap}; use crate::vstate::vcpu::VcpuEmulation; use crate::vstate::vm::Vm; @@ -116,6 +116,7 @@ impl KvmVcpu { guest_mem: &GuestMemoryMmap, kernel_load_addr: GuestAddress, vcpu_config: &VcpuConfig, + optional_capabilities: &OptionalCapabilities, ) -> Result<(), KvmVcpuError> { for reg in vcpu_config.cpu_config.regs.iter() { self.fd @@ -128,6 +129,7 @@ impl KvmVcpu { self.index, kernel_load_addr.raw_value(), guest_mem, + optional_capabilities, ) .map_err(KvmVcpuError::ConfigureRegisters)?; @@ -338,7 +340,8 @@ mod tests { #[test] fn test_configure_vcpu() { - let (_, _, mut vcpu, vm_mem) = setup_vcpu(0x10000); + let (kvm, _, mut vcpu, vm_mem) = setup_vcpu(0x10000); + let optional_capabilities = kvm.optional_capabilities(); let vcpu_config = VcpuConfig { vcpu_count: 1, @@ -349,6 +352,7 @@ mod tests { &vm_mem, GuestAddress(crate::arch::get_kernel_start()), &vcpu_config, + &optional_capabilities, ) .unwrap(); @@ -358,6 +362,7 @@ mod tests { &vm_mem, GuestAddress(crate::arch::get_kernel_start()), &vcpu_config, + &optional_capabilities, ); assert_eq!( err.unwrap_err(), diff --git a/src/vmm/src/vstate/vcpu/mod.rs b/src/vmm/src/vstate/vcpu/mod.rs index 3d8877285a4..6a6471193dc 100644 --- a/src/vmm/src/vstate/vcpu/mod.rs +++ b/src/vmm/src/vstate/vcpu/mod.rs @@ -1008,6 +1008,7 @@ pub(crate) mod tests { smt: false, cpu_config: crate::cpu_config::aarch64::CpuConfiguration::default(), }, + &kvm.optional_capabilities(), ) .expect("failed to configure vcpu"); From 1ab8c96245780ae1aeb43b3d8ab005c56d6ec032 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Fri, 10 Jan 2025 17:19:39 +0000 Subject: [PATCH 44/78] chore: update prod-host-setup.md with arm physical counter info Update a note about physical counter on ARM being reset instead of directly passed through on kernels with `KVM_CAP_COUNTER_OFFSET` capability. Signed-off-by: Egor Lazarchuk --- docs/prod-host-setup.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/prod-host-setup.md b/docs/prod-host-setup.md index f046e31c735..cf0ac86cf1f 100644 --- a/docs/prod-host-setup.md +++ b/docs/prod-host-setup.md @@ -328,13 +328,16 @@ For vendor-specific recommendations, please consult the resources below: - ARM: [Speculative Processor Vulnerability](https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability) -##### [ARM only] Physical counter directly passed through to the guest +##### [ARM only] VM Physical counter behaviour -On ARM, the physical counter (i.e `CNTPCT`) it is returning the -[actual EL1 physical counter value of the host][1]. From the discussions before -merging this change [upstream][2], this seems like a conscious design decision -of the ARM code contributors, giving precedence to performance over the ability -to trap and control this in the hypervisor. +On ARM, Firecracker tries to reset the `CNTPCT` physical counter on VM boot. +This is done in order to prevent VM from reading host physical counter value. +Firecracker will only try to reset the counter if the host KVM contains +`KVM_CAP_COUNTER_OFFSET` capability. This capability is only present in kernels +containing +[this](https://lore.kernel.org/all/20230330174800.2677007-1-maz@kernel.org/) +patch series (starting from 6.4 and newer). For older kernels the counter value +will be passed through from the host. ##### Verification @@ -428,6 +431,3 @@ To validate that the change took effect, the file [^1]: Look for `GRUB_CMDLINE_LINUX` in file `/etc/default/grub` in RPM-based systems, and [this doc for Ubuntu](https://wiki.ubuntu.com/Kernel/KernelBootParameters). - -[1]: https://elixir.free-electrons.com/linux/v4.14.203/source/virt/kvm/arm/hyp/timer-sr.c#L63 -[2]: https://lists.cs.columbia.edu/pipermail/kvmarm/2017-January/023323.html From 44b55ddc84f6711ab004fbf7fc43f113d0cc65d2 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Fri, 10 Jan 2025 11:37:21 +0000 Subject: [PATCH 45/78] chore: add an entry to the CHANGELOG Add an entry about physical counter reset to the CHANGELOG. Signed-off-by: Egor Lazarchuk --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 138ccd20079..00e82ee4b19 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,13 @@ and this project adheres to ### Added +- [#4987](https://github.com/firecracker-microvm/firecracker/pull/4987): Reset + physical counter register (`CNTPCT_EL0`) on VM startup. This avoids VM reading + the host physical counter value. This is only possible on 6.4 and newer + kernels. For older kernels physical counter will still be passed to the guest + unmodified. See more info + [here](https://github.com/firecracker-microvm/firecracker/blob/main/docs/prod-host-setup.md#arm-only-vm-physical-counter-behaviour) + ### Changed - [#4913](https://github.com/firecracker-microvm/firecracker/pull/4913): Removed From 0efae509cc815774934e67f658bad143841f7cdd Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Tue, 14 Jan 2025 17:32:43 +0000 Subject: [PATCH 46/78] refactor: move KVM related logic into a separate struct `Vm` constructor was the only place where the `/dev/kvm` was open and only there we could do any KVM (not VM) specific checks. By moving this KVM logic into a separate struct we can can do KVM specific actions (like checking optional KVM capabilities) without needing to reopen the `/dev/kvm` again. Signed-off-by: Egor Lazarchuk --- src/vmm/src/arch/aarch64/vcpu.rs | 18 +-- src/vmm/src/builder.rs | 38 +++-- src/vmm/src/device_manager/legacy.rs | 7 +- src/vmm/src/device_manager/mmio.rs | 10 +- src/vmm/src/lib.rs | 6 + src/vmm/src/persist.rs | 4 + src/vmm/src/vstate/kvm.rs | 205 +++++++++++++++++++++++++++ src/vmm/src/vstate/mod.rs | 2 + src/vmm/src/vstate/vcpu/aarch64.rs | 46 +++--- src/vmm/src/vstate/vcpu/mod.rs | 34 ++--- src/vmm/src/vstate/vcpu/x86_64.rs | 77 +++++----- src/vmm/src/vstate/vm.rs | 198 ++++---------------------- 12 files changed, 372 insertions(+), 273 deletions(-) create mode 100644 src/vmm/src/vstate/kvm.rs diff --git a/src/vmm/src/arch/aarch64/vcpu.rs b/src/vmm/src/arch/aarch64/vcpu.rs index 80fc5a339df..859e2da2cb6 100644 --- a/src/vmm/src/arch/aarch64/vcpu.rs +++ b/src/vmm/src/arch/aarch64/vcpu.rs @@ -214,16 +214,16 @@ pub fn set_mpstate(vcpufd: &VcpuFd, state: kvm_mp_state) -> Result<(), VcpuError #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] - use kvm_ioctls::Kvm; use super::*; use crate::arch::aarch64::layout; use crate::test_utils::arch_mem; + use crate::vstate::kvm::Kvm; #[test] fn test_setup_regs() { - let kvm = Kvm::new().unwrap(); - let vm = kvm.create_vm().unwrap(); + let kvm = Kvm::new(vec![]).unwrap(); + let vm = kvm.fd.create_vm().unwrap(); let vcpu = vm.create_vcpu(0).unwrap(); let mem = arch_mem(layout::FDT_MAX_SIZE + 0x1000); @@ -242,8 +242,8 @@ mod tests { #[test] fn test_read_mpidr() { - let kvm = Kvm::new().unwrap(); - let vm = kvm.create_vm().unwrap(); + let kvm = Kvm::new(vec![]).unwrap(); + let vm = kvm.fd.create_vm().unwrap(); let vcpu = vm.create_vcpu(0).unwrap(); let mut kvi: kvm_bindings::kvm_vcpu_init = kvm_bindings::kvm_vcpu_init::default(); vm.get_preferred_target(&mut kvi).unwrap(); @@ -261,8 +261,8 @@ mod tests { #[test] fn test_get_set_regs() { - let kvm = Kvm::new().unwrap(); - let vm = kvm.create_vm().unwrap(); + let kvm = Kvm::new(vec![]).unwrap(); + let vm = kvm.fd.create_vm().unwrap(); let vcpu = vm.create_vcpu(0).unwrap(); let mut kvi: kvm_bindings::kvm_vcpu_init = kvm_bindings::kvm_vcpu_init::default(); vm.get_preferred_target(&mut kvi).unwrap(); @@ -283,8 +283,8 @@ mod tests { fn test_mpstate() { use std::os::unix::io::AsRawFd; - let kvm = Kvm::new().unwrap(); - let vm = kvm.create_vm().unwrap(); + let kvm = Kvm::new(vec![]).unwrap(); + let vm = kvm.fd.create_vm().unwrap(); let vcpu = vm.create_vcpu(0).unwrap(); let mut kvi: kvm_bindings::kvm_vcpu_init = kvm_bindings::kvm_vcpu_init::default(); vm.get_preferred_target(&mut kvi).unwrap(); diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 999e27415fc..6a59fe35a83 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -68,6 +68,7 @@ use crate::utils::u64_to_usize; use crate::vmm_config::boot_source::BootConfig; use crate::vmm_config::instance_info::InstanceInfo; use crate::vmm_config::machine_config::{MachineConfig, MachineConfigError}; +use crate::vstate::kvm::Kvm; use crate::vstate::memory::{GuestAddress, GuestMemory, GuestMemoryMmap}; use crate::vstate::vcpu::{Vcpu, VcpuConfig, VcpuError}; use crate::vstate::vm::Vm; @@ -160,11 +161,17 @@ fn create_vmm_and_vcpus( ) -> Result<(Vmm, Vec), StartMicrovmError> { use self::StartMicrovmError::*; + let kvm = Kvm::new(kvm_capabilities) + .map_err(VmmError::Kvm) + .map_err(StartMicrovmError::Internal)?; // Set up Kvm Vm and register memory regions. // Build custom CPU config if a custom template is provided. - let mut vm = Vm::new(kvm_capabilities) + let mut vm = Vm::new(&kvm) .map_err(VmmError::Vm) .map_err(StartMicrovmError::Internal)?; + kvm.check_memory(&guest_memory) + .map_err(VmmError::Kvm) + .map_err(StartMicrovmError::Internal)?; vm.memory_init(&guest_memory, track_dirty_pages) .map_err(VmmError::Vm) .map_err(StartMicrovmError::Internal)?; @@ -186,7 +193,7 @@ fn create_vmm_and_vcpus( #[cfg(target_arch = "x86_64")] let (vcpus, pio_device_manager) = { setup_interrupt_controller(&mut vm)?; - let vcpus = create_vcpus(&vm, vcpu_count, &vcpus_exit_evt).map_err(Internal)?; + let vcpus = create_vcpus(&kvm, &vm, vcpu_count, &vcpus_exit_evt).map_err(Internal)?; // Make stdout non blocking. set_stdout_nonblocking(); @@ -218,7 +225,7 @@ fn create_vmm_and_vcpus( // Search for `kvm_arch_vcpu_create` in arch/arm/kvm/arm.c. #[cfg(target_arch = "aarch64")] let vcpus = { - let vcpus = create_vcpus(&vm, vcpu_count, &vcpus_exit_evt).map_err(Internal)?; + let vcpus = create_vcpus(&kvm, &vm, vcpu_count, &vcpus_exit_evt).map_err(Internal)?; setup_interrupt_controller(&mut vm, vcpu_count)?; vcpus }; @@ -227,6 +234,7 @@ fn create_vmm_and_vcpus( events_observer: Some(std::io::stdin()), instance_info: instance_info.clone(), shutdown_exit_code: None, + kvm, vm, guest_memory, uffd, @@ -476,7 +484,7 @@ pub fn build_microvm_from_snapshot( uffd, vm_resources.machine_config.track_dirty_pages, vm_resources.machine_config.vcpu_count, - microvm_state.vm_state.kvm_cap_modifiers.clone(), + microvm_state.kvm_state.kvm_cap_modifiers.clone(), )?; #[cfg(target_arch = "x86_64")] @@ -738,11 +746,16 @@ fn attach_legacy_devices_aarch64( .map_err(VmmError::RegisterMMIODevice) } -fn create_vcpus(vm: &Vm, vcpu_count: u8, exit_evt: &EventFd) -> Result, VmmError> { +fn create_vcpus( + kvm: &Kvm, + vm: &Vm, + vcpu_count: u8, + exit_evt: &EventFd, +) -> Result, VmmError> { let mut vcpus = Vec::with_capacity(vcpu_count as usize); for cpu_idx in 0..vcpu_count { let exit_evt = exit_evt.try_clone().map_err(VmmError::EventFd)?; - let vcpu = Vcpu::new(cpu_idx, vm, exit_evt).map_err(VmmError::VcpuCreate)?; + let vcpu = Vcpu::new(cpu_idx, vm, kvm, exit_evt).map_err(VmmError::VcpuCreate)?; vcpus.push(vcpu); } Ok(vcpus) @@ -765,7 +778,7 @@ pub fn configure_system_for_boot( #[cfg(target_arch = "x86_64")] let cpu_config = { use crate::cpu_config::x86_64::cpuid; - let cpuid = cpuid::Cpuid::try_from(vmm.vm.supported_cpuid().clone()) + let cpuid = cpuid::Cpuid::try_from(vmm.kvm.supported_cpuid.clone()) .map_err(GuestConfigError::CpuidFromKvmCpuid)?; let msrs = vcpus[0] .kvm_vcpu @@ -1111,7 +1124,8 @@ pub(crate) mod tests { .map_err(StartMicrovmError::Internal) .unwrap(); - let mut vm = Vm::new(vec![]).unwrap(); + let kvm = Kvm::new(vec![]).unwrap(); + let mut vm = Vm::new(&kvm).unwrap(); vm.memory_init(&guest_memory, false).unwrap(); let mmio_device_manager = MMIODeviceManager::new(); let acpi_device_manager = ACPIDeviceManager::new(); @@ -1137,7 +1151,7 @@ pub(crate) mod tests { #[cfg(target_arch = "aarch64")] { let exit_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap(); - let _vcpu = Vcpu::new(1, &vm, exit_evt).unwrap(); + let _vcpu = Vcpu::new(1, &vm, &kvm, exit_evt).unwrap(); setup_interrupt_controller(&mut vm, 1).unwrap(); } @@ -1145,6 +1159,7 @@ pub(crate) mod tests { events_observer: Some(std::io::stdin()), instance_info: InstanceInfo::default(), shutdown_exit_code: None, + kvm, vm, guest_memory, uffd: None, @@ -1362,15 +1377,16 @@ pub(crate) mod tests { let vcpu_count = 2; let guest_memory = arch_mem(128 << 20); + let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); #[allow(unused_mut)] - let mut vm = Vm::new(vec![]).unwrap(); + let mut vm = Vm::new(&kvm).unwrap(); vm.memory_init(&guest_memory, false).unwrap(); let evfd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); #[cfg(target_arch = "x86_64")] setup_interrupt_controller(&mut vm).unwrap(); - let vcpu_vec = create_vcpus(&vm, vcpu_count, &evfd).unwrap(); + let vcpu_vec = create_vcpus(&kvm, &vm, vcpu_count, &evfd).unwrap(); assert_eq!(vcpu_vec.len(), vcpu_count as usize); } diff --git a/src/vmm/src/device_manager/legacy.rs b/src/vmm/src/device_manager/legacy.rs index 45842d933b2..8526d3c2901 100644 --- a/src/vmm/src/device_manager/legacy.rs +++ b/src/vmm/src/device_manager/legacy.rs @@ -244,14 +244,11 @@ impl PortIODeviceManager { #[cfg(test)] mod tests { use super::*; - use crate::test_utils::single_region_mem; - use crate::Vm; + use crate::vstate::vm::tests::setup_vm_with_memory; #[test] fn test_register_legacy_devices() { - let guest_mem = single_region_mem(0x1000); - let mut vm = Vm::new(vec![]).unwrap(); - vm.memory_init(&guest_mem, false).unwrap(); + let (_, mut vm, _) = setup_vm_with_memory(0x1000); crate::builder::setup_interrupt_controller(&mut vm).unwrap(); let mut ldm = PortIODeviceManager::new( Arc::new(Mutex::new(BusDevice::Serial(SerialDevice { diff --git a/src/vmm/src/device_manager/mmio.rs b/src/vmm/src/device_manager/mmio.rs index 00c155abcfd..635bc1bc6e0 100644 --- a/src/vmm/src/device_manager/mmio.rs +++ b/src/vmm/src/device_manager/mmio.rs @@ -544,6 +544,7 @@ mod tests { use crate::devices::virtio::queue::Queue; use crate::devices::virtio::ActivateError; use crate::test_utils::multi_region_mem; + use crate::vstate::kvm::Kvm; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; use crate::{builder, Vm}; @@ -661,7 +662,8 @@ mod tests { let start_addr1 = GuestAddress(0x0); let start_addr2 = GuestAddress(0x1000); let guest_mem = multi_region_mem(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]); - let mut vm = Vm::new(vec![]).unwrap(); + let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); + let mut vm = Vm::new(&kvm).unwrap(); vm.memory_init(&guest_mem, false).unwrap(); let mut device_manager = MMIODeviceManager::new(); let mut resource_allocator = ResourceAllocator::new().unwrap(); @@ -690,7 +692,8 @@ mod tests { let start_addr1 = GuestAddress(0x0); let start_addr2 = GuestAddress(0x1000); let guest_mem = multi_region_mem(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]); - let mut vm = Vm::new(vec![]).unwrap(); + let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); + let mut vm = Vm::new(&kvm).unwrap(); vm.memory_init(&guest_mem, false).unwrap(); let mut device_manager = MMIODeviceManager::new(); let mut resource_allocator = ResourceAllocator::new().unwrap(); @@ -744,7 +747,8 @@ mod tests { let start_addr1 = GuestAddress(0x0); let start_addr2 = GuestAddress(0x1000); let guest_mem = multi_region_mem(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]); - let mut vm = Vm::new(vec![]).unwrap(); + let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); + let mut vm = Vm::new(&kvm).unwrap(); vm.memory_init(&guest_mem, false).unwrap(); let mem_clone = guest_mem.clone(); diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 77c0018c55a..6833a3a12d2 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -127,6 +127,7 @@ use userfaultfd::Uffd; use vmm_sys_util::epoll::EventSet; use vmm_sys_util::eventfd::EventFd; use vmm_sys_util::terminal::Terminal; +use vstate::kvm::Kvm; use vstate::vcpu::{self, KvmVcpuConfigureError, StartThreadedError, VcpuSendEventError}; use crate::arch::DeviceType; @@ -255,6 +256,8 @@ pub enum VmmError { VcpuSpawn(io::Error), /// Vm error: {0} Vm(vstate::vm::VmError), + /// Kvm error: {0} + Kvm(vstate::kvm::KvmError), /// Error thrown by observer object on Vmm initialization: {0} VmmObserverInit(vmm_sys_util::errno::Error), /// Error thrown by observer object on Vmm teardown: {0} @@ -307,6 +310,7 @@ pub struct Vmm { shutdown_exit_code: Option, // Guest VM core resources. + kvm: Kvm, vm: Vm, guest_memory: GuestMemoryMmap, // Save UFFD in order to keep it open in the Firecracker process, as well. @@ -511,6 +515,7 @@ impl Vmm { pub fn save_state(&mut self, vm_info: &VmInfo) -> Result { use self::MicrovmStateError::SaveVmState; let vcpu_states = self.save_vcpu_states()?; + let kvm_state = self.kvm.save_state(); let vm_state = { #[cfg(target_arch = "x86_64")] { @@ -531,6 +536,7 @@ impl Vmm { Ok(MicrovmState { vm_info: vm_info.clone(), memory_state, + kvm_state, vm_state, vcpu_states, device_states, diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 621d95d1e87..c9aadad10a9 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -36,6 +36,7 @@ use crate::vmm_config::machine_config::{HugePageConfig, MachineConfigError, Mach use crate::vmm_config::snapshot::{ CreateSnapshotParams, LoadSnapshotParams, MemBackendType, SnapshotType, }; +use crate::vstate::kvm::KvmState; use crate::vstate::memory::{ GuestMemory, GuestMemoryExtension, GuestMemoryMmap, GuestMemoryState, MemoryError, }; @@ -77,6 +78,8 @@ pub struct MicrovmState { pub vm_info: VmInfo, /// Memory state. pub memory_state: GuestMemoryState, + /// KVM KVM state. + pub kvm_state: KvmState, /// VM KVM state. pub vm_state: VmState, /// Vcpu states. @@ -736,6 +739,7 @@ mod tests { device_states: states, memory_state, vcpu_states, + kvm_state: Default::default(), vm_info: VmInfo { mem_size_mib: 1u64, ..Default::default() diff --git a/src/vmm/src/vstate/kvm.rs b/src/vmm/src/vstate/kvm.rs new file mode 100644 index 00000000000..985a9fae1b3 --- /dev/null +++ b/src/vmm/src/vstate/kvm.rs @@ -0,0 +1,205 @@ +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use kvm_bindings::KVM_API_VERSION; +#[cfg(target_arch = "x86_64")] +use kvm_bindings::{CpuId, MsrList, KVM_MAX_CPUID_ENTRIES}; +use kvm_ioctls::Kvm as KvmFd; +use serde::{Deserialize, Serialize}; + +use crate::cpu_config::templates::KvmCapability; +use crate::vstate::memory::{GuestMemory, GuestMemoryMmap}; + +/// Errors associated with the wrappers over KVM ioctls. +/// Needs `rustfmt::skip` to make multiline comments work +#[rustfmt::skip] +#[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] +pub enum KvmError { + /// The host kernel reports an invalid KVM API version: {0} + ApiVersion(i32), + /// Missing KVM capabilities: {0:#x?} + Capabilities(u32), + /** Error creating KVM object: {0} Make sure the user launching the firecracker process is \ + configured on the /dev/kvm file's ACL. */ + Kvm(kvm_ioctls::Error), + #[cfg(target_arch = "x86_64")] + /// Failed to get MSR index list to save into snapshots: {0} + GetMsrsToSave(crate::arch::x86_64::msr::MsrError), + #[cfg(target_arch = "x86_64")] + /// Failed to get supported cpuid: {0} + GetSupportedCpuId(kvm_ioctls::Error), + /// The number of configured slots is bigger than the maximum reported by KVM + NotEnoughMemorySlots, +} + +/// Struct with kvm fd and kvm associated paramenters. +#[derive(Debug)] +pub struct Kvm { + /// KVM fd. + pub fd: KvmFd, + /// Maximum number of memory slots allowed by KVM. + pub max_memslots: usize, + /// Additional capabilities that were specified in cpu template. + pub kvm_cap_modifiers: Vec, + + #[cfg(target_arch = "x86_64")] + /// Supported CpuIds. + pub supported_cpuid: CpuId, + #[cfg(target_arch = "x86_64")] + /// Msrs needed to be saved on snapshot creation. + pub msrs_to_save: MsrList, +} + +impl Kvm { + /// Create `Kvm` struct. + pub fn new(kvm_cap_modifiers: Vec) -> Result { + let kvm_fd = KvmFd::new().map_err(KvmError::Kvm)?; + + // Check that KVM has the correct version. + // Safe to cast because this is a constant. + #[allow(clippy::cast_possible_wrap)] + if kvm_fd.get_api_version() != KVM_API_VERSION as i32 { + return Err(KvmError::ApiVersion(kvm_fd.get_api_version())); + } + + let total_caps = Self::combine_capabilities(&kvm_cap_modifiers); + // Check that all desired capabilities are supported. + Self::check_capabilities(&kvm_fd, &total_caps).map_err(KvmError::Capabilities)?; + + let max_memslots = kvm_fd.get_nr_memslots(); + + #[cfg(target_arch = "aarch64")] + { + Ok(Self { + fd: kvm_fd, + max_memslots, + kvm_cap_modifiers, + }) + } + + #[cfg(target_arch = "x86_64")] + { + let supported_cpuid = kvm_fd + .get_supported_cpuid(KVM_MAX_CPUID_ENTRIES) + .map_err(KvmError::GetSupportedCpuId)?; + let msrs_to_save = crate::arch::x86_64::msr::get_msrs_to_save(&kvm_fd) + .map_err(KvmError::GetMsrsToSave)?; + + Ok(Kvm { + fd: kvm_fd, + max_memslots, + kvm_cap_modifiers, + supported_cpuid, + msrs_to_save, + }) + } + } + + /// Check guest memory does not have more regions than kvm allows. + pub fn check_memory(&self, guest_mem: &GuestMemoryMmap) -> Result<(), KvmError> { + if guest_mem.num_regions() > self.max_memslots { + Err(KvmError::NotEnoughMemorySlots) + } else { + Ok(()) + } + } + + fn combine_capabilities(kvm_cap_modifiers: &[KvmCapability]) -> Vec { + let mut total_caps = Self::DEFAULT_CAPABILITIES.to_vec(); + for modifier in kvm_cap_modifiers.iter() { + match modifier { + KvmCapability::Add(cap) => { + if !total_caps.contains(cap) { + total_caps.push(*cap); + } + } + KvmCapability::Remove(cap) => { + if let Some(pos) = total_caps.iter().position(|c| c == cap) { + total_caps.swap_remove(pos); + } + } + } + } + total_caps + } + + fn check_capabilities(kvm_fd: &KvmFd, capabilities: &[u32]) -> Result<(), u32> { + for cap in capabilities { + // If capability is not supported kernel will return 0. + if kvm_fd.check_extension_raw(u64::from(*cap)) == 0 { + return Err(*cap); + } + } + Ok(()) + } + + /// Saves and returns the Kvm state. + pub fn save_state(&self) -> KvmState { + KvmState { + kvm_cap_modifiers: self.kvm_cap_modifiers.clone(), + } + } +} + +#[cfg(target_arch = "aarch64")] +impl Kvm { + const DEFAULT_CAPABILITIES: [u32; 7] = [ + kvm_bindings::KVM_CAP_IOEVENTFD, + kvm_bindings::KVM_CAP_IRQFD, + kvm_bindings::KVM_CAP_USER_MEMORY, + kvm_bindings::KVM_CAP_ARM_PSCI_0_2, + kvm_bindings::KVM_CAP_DEVICE_CTRL, + kvm_bindings::KVM_CAP_MP_STATE, + kvm_bindings::KVM_CAP_ONE_REG, + ]; +} + +#[cfg(target_arch = "x86_64")] +impl Kvm { + const DEFAULT_CAPABILITIES: [u32; 14] = [ + kvm_bindings::KVM_CAP_IRQCHIP, + kvm_bindings::KVM_CAP_IOEVENTFD, + kvm_bindings::KVM_CAP_IRQFD, + kvm_bindings::KVM_CAP_USER_MEMORY, + kvm_bindings::KVM_CAP_SET_TSS_ADDR, + kvm_bindings::KVM_CAP_PIT2, + kvm_bindings::KVM_CAP_PIT_STATE2, + kvm_bindings::KVM_CAP_ADJUST_CLOCK, + kvm_bindings::KVM_CAP_DEBUGREGS, + kvm_bindings::KVM_CAP_MP_STATE, + kvm_bindings::KVM_CAP_VCPU_EVENTS, + kvm_bindings::KVM_CAP_XCRS, + kvm_bindings::KVM_CAP_XSAVE, + kvm_bindings::KVM_CAP_EXT_CPUID, + ]; +} + +/// Structure holding an general specific VM state. +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct KvmState { + /// Additional capabilities that were specified in cpu template. + pub kvm_cap_modifiers: Vec, +} + +#[cfg(test)] +pub(crate) mod tests { + use super::*; + + #[test] + fn test_combine_capabilities() { + // Default caps for x86_64 and aarch64 both have KVM_CAP_IOEVENTFD and don't have + // KVM_CAP_IOMMU caps. + let additional_capabilities = vec![ + KvmCapability::Add(kvm_bindings::KVM_CAP_IOMMU), + KvmCapability::Remove(kvm_bindings::KVM_CAP_IOEVENTFD), + ]; + + let combined_caps = Kvm::combine_capabilities(&additional_capabilities); + assert!(combined_caps + .iter() + .any(|c| *c == kvm_bindings::KVM_CAP_IOMMU)); + assert!(!combined_caps + .iter() + .any(|c| *c == kvm_bindings::KVM_CAP_IOEVENTFD)); + } +} diff --git a/src/vmm/src/vstate/mod.rs b/src/vmm/src/vstate/mod.rs index 32d7bd7ea7f..47458835e04 100644 --- a/src/vmm/src/vstate/mod.rs +++ b/src/vmm/src/vstate/mod.rs @@ -1,6 +1,8 @@ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 +/// Module with Kvm implementation. +pub mod kvm; /// Module with GuestMemory implementation. pub mod memory; /// Module with Vcpu implementation. diff --git a/src/vmm/src/vstate/vcpu/aarch64.rs b/src/vmm/src/vstate/vcpu/aarch64.rs index 4e006f196d0..4097ef59044 100644 --- a/src/vmm/src/vstate/vcpu/aarch64.rs +++ b/src/vmm/src/vstate/vcpu/aarch64.rs @@ -22,6 +22,7 @@ use crate::cpu_config::aarch64::custom_cpu_template::VcpuFeatures; use crate::cpu_config::templates::CpuConfiguration; use crate::logger::{error, IncMetric, METRICS}; use crate::vcpu::{VcpuConfig, VcpuError}; +use crate::vstate::kvm::Kvm; use crate::vstate::memory::{Address, GuestAddress, GuestMemoryMmap}; use crate::vstate::vcpu::VcpuEmulation; use crate::vstate::vm::Vm; @@ -77,7 +78,7 @@ impl KvmVcpu { /// /// * `index` - Represents the 0-based CPU index between [0, max vcpus). /// * `vm` - The vm to which this vcpu will get attached. - pub fn new(index: u8, vm: &Vm) -> Result { + pub fn new(index: u8, vm: &Vm, _: &Kvm) -> Result { let kvm_vcpu = vm .fd() .create_vcpu(index.into()) @@ -305,26 +306,27 @@ mod tests { use crate::cpu_config::aarch64::CpuConfiguration; use crate::cpu_config::templates::RegisterValueFilter; use crate::vcpu::VcpuConfig; + use crate::vstate::kvm::Kvm; use crate::vstate::memory::GuestMemoryMmap; - use crate::vstate::vm::tests::setup_vm; + use crate::vstate::vm::tests::setup_vm_with_memory; use crate::vstate::vm::Vm; - fn setup_vcpu(mem_size: usize) -> (Vm, KvmVcpu, GuestMemoryMmap) { - let (mut vm, vm_mem) = setup_vm(mem_size); - let mut vcpu = KvmVcpu::new(0, &vm).unwrap(); + fn setup_vcpu(mem_size: usize) -> (Kvm, Vm, KvmVcpu, GuestMemoryMmap) { + let (kvm, mut vm, vm_mem) = setup_vm_with_memory(mem_size); + let mut vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); vcpu.init(&[]).unwrap(); vm.setup_irqchip(1).unwrap(); - (vm, vcpu, vm_mem) + (kvm, vm, vcpu, vm_mem) } #[test] fn test_create_vcpu() { - let (vm, _) = setup_vm(0x1000); + let (kvm, vm, _) = setup_vm_with_memory(0x1000); unsafe { libc::close(vm.fd().as_raw_fd()) }; - let err = KvmVcpu::new(0, &vm); + let err = KvmVcpu::new(0, &vm, &kvm); assert_eq!( err.err().unwrap().to_string(), "Error creating vcpu: Bad file descriptor (os error 9)".to_string() @@ -336,7 +338,7 @@ mod tests { #[test] fn test_configure_vcpu() { - let (_vm, mut vcpu, vm_mem) = setup_vcpu(0x10000); + let (_, _, mut vcpu, vm_mem) = setup_vcpu(0x10000); let vcpu_config = VcpuConfig { vcpu_count: 1, @@ -371,8 +373,8 @@ mod tests { #[test] fn test_init_vcpu() { - let (mut vm, _vm_mem) = setup_vm(0x1000); - let mut vcpu = KvmVcpu::new(0, &vm).unwrap(); + let (kvm, mut vm, _) = setup_vm_with_memory(0x1000); + let mut vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); vm.setup_irqchip(1).unwrap(); // KVM_ARM_VCPU_PSCI_0_2 is set by default. @@ -390,8 +392,8 @@ mod tests { #[test] fn test_vcpu_save_restore_state() { - let (mut vm, _vm_mem) = setup_vm(0x1000); - let mut vcpu = KvmVcpu::new(0, &vm).unwrap(); + let (kvm, mut vm, _) = setup_vm_with_memory(0x1000); + let mut vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); vm.setup_irqchip(1).unwrap(); // Calling KVM_GET_REGLIST before KVM_VCPU_INIT will result in error. @@ -434,8 +436,8 @@ mod tests { // // This should fail with ENOEXEC. // https://elixir.bootlin.com/linux/v5.10.176/source/arch/arm64/kvm/arm.c#L1165 - let (mut vm, _vm_mem) = setup_vm(0x1000); - let vcpu = KvmVcpu::new(0, &vm).unwrap(); + let (kvm, mut vm, _) = setup_vm_with_memory(0x1000); + let vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); vm.setup_irqchip(1).unwrap(); vcpu.dump_cpu_config().unwrap_err(); @@ -444,8 +446,8 @@ mod tests { #[test] fn test_dump_cpu_config_after_init() { // Test `dump_cpu_config()` after `KVM_VCPU_INIT`. - let (mut vm, _vm_mem) = setup_vm(0x1000); - let mut vcpu = KvmVcpu::new(0, &vm).unwrap(); + let (kvm, mut vm, _) = setup_vm_with_memory(0x1000); + let mut vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); vm.setup_irqchip(1).unwrap(); vcpu.init(&[]).unwrap(); @@ -454,10 +456,10 @@ mod tests { #[test] fn test_setup_non_boot_vcpu() { - let (vm, _) = setup_vm(0x1000); - let mut vcpu1 = KvmVcpu::new(0, &vm).unwrap(); + let (kvm, vm, _) = setup_vm_with_memory(0x1000); + let mut vcpu1 = KvmVcpu::new(0, &vm, &kvm).unwrap(); vcpu1.init(&[]).unwrap(); - let mut vcpu2 = KvmVcpu::new(1, &vm).unwrap(); + let mut vcpu2 = KvmVcpu::new(1, &vm, &kvm).unwrap(); vcpu2.init(&[]).unwrap(); } @@ -466,7 +468,7 @@ mod tests { // Test `get_regs()` with valid register IDs. // - X0: 0x6030 0000 0010 0000 // - X1: 0x6030 0000 0010 0002 - let (_, vcpu, _) = setup_vcpu(0x10000); + let (_, _, vcpu, _) = setup_vcpu(0x10000); let reg_list = Vec::::from([0x6030000000100000, 0x6030000000100002]); get_registers(&vcpu.fd, ®_list, &mut Aarch64RegisterVec::default()).unwrap(); } @@ -474,7 +476,7 @@ mod tests { #[test] fn test_get_invalid_regs() { // Test `get_regs()` with invalid register IDs. - let (_, vcpu, _) = setup_vcpu(0x10000); + let (_, _, vcpu, _) = setup_vcpu(0x10000); let reg_list = Vec::::from([0x6030000000100001, 0x6030000000100003]); get_registers(&vcpu.fd, ®_list, &mut Aarch64RegisterVec::default()).unwrap_err(); } diff --git a/src/vmm/src/vstate/vcpu/mod.rs b/src/vmm/src/vstate/vcpu/mod.rs index ddfeda21b4c..3d8877285a4 100644 --- a/src/vmm/src/vstate/vcpu/mod.rs +++ b/src/vmm/src/vstate/vcpu/mod.rs @@ -44,6 +44,8 @@ pub use aarch64::{KvmVcpuError, *}; #[cfg(target_arch = "x86_64")] pub use x86_64::{KvmVcpuError, *}; +use super::kvm::Kvm; + /// Signal number (SIGRTMIN) used to kick Vcpus. pub const VCPU_RTSIG_OFFSET: i32 = 0; @@ -212,10 +214,10 @@ impl Vcpu { /// * `index` - Represents the 0-based CPU index between [0, max vcpus). /// * `vm` - The vm to which this vcpu will get attached. /// * `exit_evt` - An `EventFd` that will be written into when this vcpu exits. - pub fn new(index: u8, vm: &Vm, exit_evt: EventFd) -> Result { + pub fn new(index: u8, vm: &Vm, kvm: &Kvm, exit_evt: EventFd) -> Result { let (event_sender, event_receiver) = channel(); let (response_sender, response_receiver) = channel(); - let kvm_vcpu = KvmVcpu::new(index, vm).unwrap(); + let kvm_vcpu = KvmVcpu::new(index, vm, kvm).unwrap(); Ok(Vcpu { exit_evt, @@ -777,13 +779,13 @@ pub(crate) mod tests { use crate::utils::signal::validate_signal_num; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; use crate::vstate::vcpu::VcpuError as EmulationError; - use crate::vstate::vm::tests::setup_vm; + use crate::vstate::vm::tests::setup_vm_with_memory; use crate::vstate::vm::Vm; use crate::RECV_TIMEOUT_SEC; #[test] fn test_handle_kvm_exit() { - let (_vm, mut vcpu, _vm_mem) = setup_vcpu(0x1000); + let (_, _, mut vcpu, _vm_mem) = setup_vcpu(0x1000); let res = handle_kvm_exit(&mut vcpu.kvm_vcpu.peripherals, Ok(VcpuExit::Hlt)); assert_eq!(res.unwrap(), VcpuEmulation::Stopped); @@ -918,14 +920,14 @@ pub(crate) mod tests { // Auxiliary function being used throughout the tests. #[allow(unused_mut)] - pub(crate) fn setup_vcpu(mem_size: usize) -> (Vm, Vcpu, GuestMemoryMmap) { - let (mut vm, gm) = setup_vm(mem_size); + pub(crate) fn setup_vcpu(mem_size: usize) -> (Kvm, Vm, Vcpu, GuestMemoryMmap) { + let (kvm, mut vm, gm) = setup_vm_with_memory(mem_size); let exit_evt = EventFd::new(libc::EFD_NONBLOCK).unwrap(); #[cfg(target_arch = "aarch64")] let vcpu = { - let mut vcpu = Vcpu::new(1, &vm, exit_evt).unwrap(); + let mut vcpu = Vcpu::new(1, &vm, &kvm, exit_evt).unwrap(); vcpu.kvm_vcpu.init(&[]).unwrap(); vm.setup_irqchip(1).unwrap(); vcpu @@ -933,9 +935,9 @@ pub(crate) mod tests { #[cfg(target_arch = "x86_64")] let vcpu = { vm.setup_irqchip().unwrap(); - Vcpu::new(1, &vm, exit_evt).unwrap() + Vcpu::new(1, &vm, &kvm, exit_evt).unwrap() }; - (vm, vcpu, gm) + (kvm, vm, vcpu, gm) } fn load_good_kernel(vm_memory: &GuestMemoryMmap) -> GuestAddress { @@ -970,7 +972,7 @@ pub(crate) mod tests { Vcpu::register_kick_signal_handler(); // Need enough mem to boot linux. let mem_size = 64 << 20; - let (_vm, mut vcpu, vm_mem) = setup_vcpu(mem_size); + let (kvm, _, mut vcpu, vm_mem) = setup_vcpu(mem_size); let vcpu_exit_evt = vcpu.exit_evt.try_clone().unwrap(); @@ -988,7 +990,7 @@ pub(crate) mod tests { vcpu_count: 1, smt: false, cpu_config: CpuConfiguration { - cpuid: Cpuid::try_from(_vm.supported_cpuid().clone()).unwrap(), + cpuid: Cpuid::try_from(kvm.supported_cpuid.clone()).unwrap(), msrs: BTreeMap::new(), }, }, @@ -1022,7 +1024,7 @@ pub(crate) mod tests { #[test] fn test_set_mmio_bus() { - let (_, mut vcpu, _) = setup_vcpu(0x1000); + let (_, _, mut vcpu, _) = setup_vcpu(0x1000); assert!(vcpu.kvm_vcpu.peripherals.mmio_bus.is_none()); vcpu.set_mmio_bus(crate::devices::Bus::new()); assert!(vcpu.kvm_vcpu.peripherals.mmio_bus.is_some()); @@ -1030,7 +1032,7 @@ pub(crate) mod tests { #[test] fn test_vcpu_tls() { - let (_, mut vcpu, _) = setup_vcpu(0x1000); + let (_, _, mut vcpu, _) = setup_vcpu(0x1000); // Running on the TLS vcpu should fail before we actually initialize it. unsafe { @@ -1061,7 +1063,7 @@ pub(crate) mod tests { #[test] fn test_invalid_tls() { - let (_, mut vcpu, _) = setup_vcpu(0x1000); + let (_, _, mut vcpu, _) = setup_vcpu(0x1000); // Initialize vcpu TLS. vcpu.init_thread_local_data().unwrap(); // Trying to initialize non-empty TLS should error. @@ -1071,7 +1073,7 @@ pub(crate) mod tests { #[test] fn test_vcpu_kick() { Vcpu::register_kick_signal_handler(); - let (vm, mut vcpu, _) = setup_vcpu(0x1000); + let (_, vm, mut vcpu, _) = setup_vcpu(0x1000); let mut kvm_run = kvm_ioctls::KvmRunWrapper::mmap_from_fd(&vcpu.kvm_vcpu.fd, vm.fd().run_size()) @@ -1126,7 +1128,7 @@ pub(crate) mod tests { #[test] fn test_immediate_exit_shortcircuits_execution() { - let (_vm, mut vcpu, _) = setup_vcpu(0x1000); + let (_, _, mut vcpu, _) = setup_vcpu(0x1000); vcpu.kvm_vcpu.fd.set_kvm_immediate_exit(1); // Set a dummy value to be returned by the emulate call diff --git a/src/vmm/src/vstate/vcpu/x86_64.rs b/src/vmm/src/vstate/vcpu/x86_64.rs index 4043691130d..39ff0879ee8 100644 --- a/src/vmm/src/vstate/vcpu/x86_64.rs +++ b/src/vmm/src/vstate/vcpu/x86_64.rs @@ -23,6 +23,7 @@ use crate::arch::x86_64::msr::{create_boot_msr_entries, MsrError}; use crate::arch::x86_64::regs::{SetupFpuError, SetupRegistersError, SetupSpecialRegistersError}; use crate::cpu_config::x86_64::{cpuid, CpuConfiguration}; use crate::logger::{IncMetric, METRICS}; +use crate::vstate::kvm::Kvm; use crate::vstate::memory::{Address, GuestAddress, GuestMemoryMmap}; use crate::vstate::vcpu::{VcpuConfig, VcpuEmulation}; use crate::vstate::vm::Vm; @@ -164,7 +165,7 @@ impl KvmVcpu { /// /// * `index` - Represents the 0-based CPU index between [0, max vcpus). /// * `vm` - The vm to which this vcpu will get attached. - pub fn new(index: u8, vm: &Vm) -> Result { + pub fn new(index: u8, vm: &Vm, kvm: &Kvm) -> Result { let kvm_vcpu = vm .fd() .create_vcpu(index.into()) @@ -174,7 +175,7 @@ impl KvmVcpu { index, fd: kvm_vcpu, peripherals: Default::default(), - msrs_to_save: vm.msrs_to_save().as_slice().to_vec(), + msrs_to_save: kvm.msrs_to_save.as_slice().to_vec(), }) } @@ -716,7 +717,7 @@ mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use kvm_bindings::kvm_msr_entry; - use kvm_ioctls::{Cap, Kvm}; + use kvm_ioctls::Cap; use super::*; use crate::arch::x86_64::cpu_model::CpuModel; @@ -725,7 +726,7 @@ mod tests { StaticCpuTemplate, }; use crate::cpu_config::x86_64::cpuid::{Cpuid, CpuidEntry, CpuidKey}; - use crate::vstate::vm::tests::setup_vm; + use crate::vstate::vm::tests::{setup_vm, setup_vm_with_memory}; use crate::vstate::vm::Vm; impl Default for VcpuState { @@ -746,11 +747,11 @@ mod tests { } } - fn setup_vcpu(mem_size: usize) -> (Vm, KvmVcpu, GuestMemoryMmap) { - let (vm, vm_mem) = setup_vm(mem_size); + fn setup_vcpu(mem_size: usize) -> (Kvm, Vm, KvmVcpu, GuestMemoryMmap) { + let (kvm, vm, vm_mem) = setup_vm_with_memory(mem_size); vm.setup_irqchip().unwrap(); - let vcpu = KvmVcpu::new(0, &vm).unwrap(); - (vm, vcpu, vm_mem) + let vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); + (kvm, vm, vcpu, vm_mem) } fn is_at_least_cascade_lake() -> bool { @@ -765,11 +766,11 @@ mod tests { } fn create_vcpu_config( - vm: &Vm, + kvm: &Kvm, vcpu: &KvmVcpu, template: &CustomCpuTemplate, ) -> Result { - let cpuid = Cpuid::try_from(vm.supported_cpuid().clone()) + let cpuid = Cpuid::try_from(kvm.supported_cpuid.clone()) .map_err(GuestConfigError::CpuidFromKvmCpuid)?; let msrs = vcpu .get_msrs(template.msr_index_iter()) @@ -785,19 +786,19 @@ mod tests { #[test] fn test_configure_vcpu() { - let (vm, mut vcpu, vm_mem) = setup_vcpu(0x10000); + let (kvm, _, mut vcpu, vm_mem) = setup_vcpu(0x10000); - let vcpu_config = create_vcpu_config(&vm, &vcpu, &CustomCpuTemplate::default()).unwrap(); + let vcpu_config = create_vcpu_config(&kvm, &vcpu, &CustomCpuTemplate::default()).unwrap(); assert_eq!( vcpu.configure(&vm_mem, GuestAddress(0), &vcpu_config,), Ok(()) ); - let try_configure = |vm: &Vm, vcpu: &mut KvmVcpu, template| -> bool { + let try_configure = |kvm: &Kvm, vcpu: &mut KvmVcpu, template| -> bool { let cpu_template = Some(CpuTemplateType::Static(template)); let template = cpu_template.get_cpu_template(); match template { - Ok(template) => match create_vcpu_config(vm, vcpu, &template) { + Ok(template) => match create_vcpu_config(kvm, vcpu, &template) { Ok(config) => vcpu .configure( &vm_mem, @@ -812,19 +813,19 @@ mod tests { }; // Test configure while using the T2 template. - let t2_res = try_configure(&vm, &mut vcpu, StaticCpuTemplate::T2); + let t2_res = try_configure(&kvm, &mut vcpu, StaticCpuTemplate::T2); // Test configure while using the C3 template. - let c3_res = try_configure(&vm, &mut vcpu, StaticCpuTemplate::C3); + let c3_res = try_configure(&kvm, &mut vcpu, StaticCpuTemplate::C3); // Test configure while using the T2S template. - let t2s_res = try_configure(&vm, &mut vcpu, StaticCpuTemplate::T2S); + let t2s_res = try_configure(&kvm, &mut vcpu, StaticCpuTemplate::T2S); // Test configure while using the T2CL template. - let t2cl_res = try_configure(&vm, &mut vcpu, StaticCpuTemplate::T2CL); + let t2cl_res = try_configure(&kvm, &mut vcpu, StaticCpuTemplate::T2CL); // Test configure while using the T2S template. - let t2a_res = try_configure(&vm, &mut vcpu, StaticCpuTemplate::T2A); + let t2a_res = try_configure(&kvm, &mut vcpu, StaticCpuTemplate::T2A); match &cpuid::common::get_vendor_id_from_host().unwrap() { cpuid::VENDOR_ID_INTEL => { @@ -857,8 +858,8 @@ mod tests { #[test] fn test_vcpu_cpuid_restore() { - let (vm, vcpu, _mem) = setup_vcpu(0x10000); - vcpu.fd.set_cpuid2(vm.supported_cpuid()).unwrap(); + let (kvm, _, vcpu, _mem) = setup_vcpu(0x10000); + vcpu.fd.set_cpuid2(&kvm.supported_cpuid).unwrap(); // Mutate the CPUID. // Leaf 0x3 / EAX that is an unused (reserved to be accurate) register, so it's harmless. @@ -875,7 +876,7 @@ mod tests { drop(vcpu); // Restore the state into a new vcpu. - let (_vm, vcpu, _mem) = setup_vcpu(0x10000); + let (_, _vm, vcpu, _mem) = setup_vcpu(0x10000); let result2 = vcpu.restore_state(&state); assert!(result2.is_ok(), "{}", result2.unwrap_err()); @@ -895,12 +896,12 @@ mod tests { #[test] fn test_empty_cpuid_entries_removed() { // Test that `get_cpuid()` removes zeroed empty entries from the `KVM_GET_CPUID2` result. - let (vm, mut vcpu, vm_mem) = setup_vcpu(0x10000); + let (kvm, _, mut vcpu, vm_mem) = setup_vcpu(0x10000); let vcpu_config = VcpuConfig { vcpu_count: 1, smt: false, cpu_config: CpuConfiguration { - cpuid: Cpuid::try_from(vm.supported_cpuid().clone()).unwrap(), + cpuid: Cpuid::try_from(kvm.supported_cpuid.clone()).unwrap(), msrs: BTreeMap::new(), }, }; @@ -946,7 +947,7 @@ mod tests { // Since `KVM_SET_CPUID2` has not been called before vcpu configuration, all leaves should // be filled with zero. Therefore, `KvmVcpu::dump_cpu_config()` should fail with CPUID type // conversion error due to the lack of brand string info in leaf 0x0. - let (_, vcpu, _) = setup_vcpu(0x10000); + let (_, _, vcpu, _) = setup_vcpu(0x10000); match vcpu.dump_cpu_config() { Err(KvmVcpuError::ConvertCpuidType(_)) => (), Err(err) => panic!("Unexpected error: {err}"), @@ -957,12 +958,12 @@ mod tests { #[test] fn test_dump_cpu_config_with_configured_vcpu() { // Test `dump_cpu_config()` after vcpu configuration. - let (vm, mut vcpu, vm_mem) = setup_vcpu(0x10000); + let (kvm, _, mut vcpu, vm_mem) = setup_vcpu(0x10000); let vcpu_config = VcpuConfig { vcpu_count: 1, smt: false, cpu_config: CpuConfiguration { - cpuid: Cpuid::try_from(vm.supported_cpuid().clone()).unwrap(), + cpuid: Cpuid::try_from(kvm.supported_cpuid.clone()).unwrap(), msrs: BTreeMap::new(), }, }; @@ -976,7 +977,7 @@ mod tests { fn test_is_tsc_scaling_required() { // Test `is_tsc_scaling_required` as if it were on the same // CPU model as the one in the snapshot state. - let (_vm, vcpu, _) = setup_vcpu(0x1000); + let (_, _, vcpu, _) = setup_vcpu(0x1000); { // The frequency difference is within tolerance. @@ -1015,7 +1016,7 @@ mod tests { #[test] fn test_set_tsc() { - let (vm, vcpu, _) = setup_vcpu(0x1000); + let (kvm, _, vcpu, _) = setup_vcpu(0x1000); let mut state = vcpu.save_state().unwrap(); state.tsc_khz = Some( state.tsc_khz.unwrap() @@ -1024,9 +1025,9 @@ mod tests { * 2, ); - if vm.fd().check_extension(Cap::TscControl) { + if kvm.fd.check_extension(Cap::TscControl) { vcpu.set_tsc_khz(state.tsc_khz.unwrap()).unwrap(); - if vm.fd().check_extension(Cap::GetTscKhz) { + if kvm.fd.check_extension(Cap::GetTscKhz) { assert_eq!(vcpu.get_tsc_khz().ok(), state.tsc_khz); } else { vcpu.get_tsc_khz().unwrap_err(); @@ -1040,7 +1041,7 @@ mod tests { fn test_get_msrs_with_msrs_to_save() { // Test `get_msrs()` with the MSR indices that should be serialized into snapshots. // The MSR indices should be valid and this test should succeed. - let (_, vcpu, _) = setup_vcpu(0x1000); + let (_, _, vcpu, _) = setup_vcpu(0x1000); vcpu.get_msrs(vcpu.msrs_to_save.iter().copied()).unwrap(); } @@ -1048,7 +1049,7 @@ mod tests { fn test_get_msrs_with_msrs_to_dump() { // Test `get_msrs()` with the MSR indices that should be dumped. // All the MSR indices should be valid and the call should succeed. - let (_, vcpu, _) = setup_vcpu(0x1000); + let (_, _, vcpu, _) = setup_vcpu(0x1000); let kvm = kvm_ioctls::Kvm::new().unwrap(); let msrs_to_dump = crate::arch::x86_64::msr::get_msrs_to_dump(&kvm).unwrap(); @@ -1061,7 +1062,7 @@ mod tests { // Test `get_msrs()` with unsupported MSR indices. This should return `VcpuGetMsr` error // that happens when `KVM_GET_MSRS` fails to populate MSR values in the middle and exits. // Currently, MSR indices 2..=4 are not listed as supported MSRs. - let (_, vcpu, _) = setup_vcpu(0x1000); + let (_, _, vcpu, _) = setup_vcpu(0x1000); let msr_index_list: Vec = vec![2, 3, 4]; match vcpu.get_msrs(msr_index_list.iter().copied()) { Err(KvmVcpuError::VcpuGetMsr(_)) => (), @@ -1167,13 +1168,11 @@ mod tests { #[test] fn test_get_msr_chunks_preserved_order() { // Regression test for #4666 - - let kvm = Kvm::new().unwrap(); - let vm = Vm::new(Vec::new()).unwrap(); - let vcpu = KvmVcpu::new(0, &vm).unwrap(); + let (kvm, vm) = setup_vm(); + let vcpu = KvmVcpu::new(0, &vm, &kvm).unwrap(); // The list of supported MSR indices, in the order they were returned by KVM - let msrs_to_save = crate::arch::x86_64::msr::get_msrs_to_save(&kvm).unwrap(); + let msrs_to_save = kvm.msrs_to_save; // The MSRs after processing. The order should be identical to the one returned by KVM, with // the exception of deferred MSRs, which should be moved to the end (but show up in the same // order as they are listed in [`DEFERRED_MSRS`]. diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index d213d4d7bb6..1bcf191b8b9 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -10,21 +10,21 @@ use std::fmt; #[cfg(target_arch = "x86_64")] use kvm_bindings::{ - kvm_clock_data, kvm_irqchip, kvm_pit_config, kvm_pit_state2, CpuId, MsrList, - KVM_CLOCK_TSC_STABLE, KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, - KVM_MAX_CPUID_ENTRIES, KVM_PIT_SPEAKER_DUMMY, + kvm_clock_data, kvm_irqchip, kvm_pit_config, kvm_pit_state2, KVM_CLOCK_TSC_STABLE, + KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, KVM_PIT_SPEAKER_DUMMY, }; -use kvm_bindings::{kvm_userspace_memory_region, KVM_API_VERSION, KVM_MEM_LOG_DIRTY_PAGES}; -use kvm_ioctls::{Kvm, VmFd}; +use kvm_bindings::{kvm_userspace_memory_region, KVM_MEM_LOG_DIRTY_PAGES}; +// use kvm_ioctls::{Kvm, VmFd}; +use kvm_ioctls::VmFd; use serde::{Deserialize, Serialize}; #[cfg(target_arch = "aarch64")] use crate::arch::aarch64::gic::GICDevice; #[cfg(target_arch = "aarch64")] use crate::arch::aarch64::gic::GicState; -use crate::cpu_config::templates::KvmCapability; #[cfg(target_arch = "x86_64")] use crate::utils::u64_to_usize; +use crate::vstate::kvm::Kvm; use crate::vstate::memory::{Address, GuestMemory, GuestMemoryMmap, GuestMemoryRegion}; /// Errors associated with the wrappers over KVM ioctls. @@ -32,18 +32,6 @@ use crate::vstate::memory::{Address, GuestMemory, GuestMemoryMmap, GuestMemoryRe #[rustfmt::skip] #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum VmError { - /// The host kernel reports an invalid KVM API version: {0} - ApiVersion(i32), - /// Missing KVM capabilities: {0:#x?} - Capabilities(u32), - /** Error creating KVM object: {0} Make sure the user launching the firecracker process is \ - configured on the /dev/kvm file's ACL. */ - Kvm(kvm_ioctls::Error), - #[cfg(target_arch = "x86_64")] - /// Failed to get MSR index list to save into snapshots: {0} - GetMsrsToSave(#[from] crate::arch::x86_64::msr::MsrError), - /// The number of configured slots is bigger than the maximum reported by KVM - NotEnoughMemorySlots, /// Cannot set the memory regions: {0} SetUserMemoryRegion(kvm_ioctls::Error), #[cfg(target_arch = "aarch64")] @@ -112,16 +100,6 @@ pub enum RestoreStateError { #[derive(Debug)] pub struct Vm { fd: VmFd, - max_memslots: usize, - - /// Additional capabilities that were specified in cpu template. - pub kvm_cap_modifiers: Vec, - - // X86 specific fields. - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - supported_cpuid: CpuId, - #[cfg(target_arch = "x86_64")] - msrs_to_save: MsrList, // Arm specific fields. // On aarch64 we need to keep around the fd obtained by creating the VGIC device. @@ -131,79 +109,23 @@ pub struct Vm { /// Contains Vm functions that are usable across CPU architectures impl Vm { - /// Constructs a new `Vm` using the given `Kvm` instance. - pub fn new(kvm_cap_modifiers: Vec) -> Result { - let kvm = Kvm::new().map_err(VmError::Kvm)?; - - // Check that KVM has the correct version. - // Safe to cast because this is a constant. - #[allow(clippy::cast_possible_wrap)] - if kvm.get_api_version() != KVM_API_VERSION as i32 { - return Err(VmError::ApiVersion(kvm.get_api_version())); - } - - let total_caps = Self::combine_capabilities(&kvm_cap_modifiers); - // Check that all desired capabilities are supported. - Self::check_capabilities(&kvm, &total_caps).map_err(VmError::Capabilities)?; - - let max_memslots = kvm.get_nr_memslots(); + /// Create a new `Vm` struct. + pub fn new(kvm: &Kvm) -> Result { // Create fd for interacting with kvm-vm specific functions. - let vm_fd = kvm.create_vm().map_err(VmError::VmFd)?; + let vm_fd = kvm.fd.create_vm().map_err(VmError::VmFd)?; #[cfg(target_arch = "aarch64")] { Ok(Vm { fd: vm_fd, - max_memslots, - kvm_cap_modifiers, irqchip_handle: None, }) } #[cfg(target_arch = "x86_64")] { - let supported_cpuid = kvm - .get_supported_cpuid(KVM_MAX_CPUID_ENTRIES) - .map_err(VmError::VmFd)?; - let msrs_to_save = crate::arch::x86_64::msr::get_msrs_to_save(&kvm)?; - - Ok(Vm { - fd: vm_fd, - max_memslots, - kvm_cap_modifiers, - supported_cpuid, - msrs_to_save, - }) - } - } - - fn combine_capabilities(kvm_cap_modifiers: &[KvmCapability]) -> Vec { - let mut total_caps = Self::DEFAULT_CAPABILITIES.to_vec(); - for modifier in kvm_cap_modifiers.iter() { - match modifier { - KvmCapability::Add(cap) => { - if !total_caps.iter().any(|c| c == cap) { - total_caps.push(*cap); - } - } - KvmCapability::Remove(cap) => { - if let Some(pos) = total_caps.iter().position(|c| c == cap) { - total_caps.remove(pos); - } - } - } + Ok(Vm { fd: vm_fd }) } - total_caps - } - - fn check_capabilities(kvm: &Kvm, capabilities: &[u32]) -> Result<(), u32> { - for cap in capabilities { - // If capability is not supported kernel will return 0. - if kvm.check_extension_raw(u64::from(*cap)) == 0 { - return Err(*cap); - } - } - Ok(()) } /// Initializes the guest memory. @@ -212,9 +134,6 @@ impl Vm { guest_mem: &GuestMemoryMmap, track_dirty_pages: bool, ) -> Result<(), VmError> { - if guest_mem.num_regions() > self.max_memslots { - return Err(VmError::NotEnoughMemorySlots); - } self.set_kvm_memory_regions(guest_mem, track_dirty_pages)?; #[cfg(target_arch = "x86_64")] self.fd @@ -261,16 +180,6 @@ impl Vm { #[cfg(target_arch = "aarch64")] impl Vm { - const DEFAULT_CAPABILITIES: [u32; 7] = [ - kvm_bindings::KVM_CAP_IOEVENTFD, - kvm_bindings::KVM_CAP_IRQFD, - kvm_bindings::KVM_CAP_USER_MEMORY, - kvm_bindings::KVM_CAP_ARM_PSCI_0_2, - kvm_bindings::KVM_CAP_DEVICE_CTRL, - kvm_bindings::KVM_CAP_MP_STATE, - kvm_bindings::KVM_CAP_ONE_REG, - ]; - /// Creates the GIC (Global Interrupt Controller). pub fn setup_irqchip(&mut self, vcpu_count: u8) -> Result<(), VmError> { self.irqchip_handle = Some( @@ -292,7 +201,6 @@ impl Vm { .get_irqchip() .save_device(mpidrs) .map_err(VmError::SaveGic)?, - kvm_cap_modifiers: self.kvm_cap_modifiers.clone(), }) } @@ -319,39 +227,10 @@ impl Vm { pub struct VmState { /// GIC state. pub gic: GicState, - /// Additional capabilities that were specified in cpu template. - pub kvm_cap_modifiers: Vec, } #[cfg(target_arch = "x86_64")] impl Vm { - const DEFAULT_CAPABILITIES: [u32; 14] = [ - kvm_bindings::KVM_CAP_IRQCHIP, - kvm_bindings::KVM_CAP_IOEVENTFD, - kvm_bindings::KVM_CAP_IRQFD, - kvm_bindings::KVM_CAP_USER_MEMORY, - kvm_bindings::KVM_CAP_SET_TSS_ADDR, - kvm_bindings::KVM_CAP_PIT2, - kvm_bindings::KVM_CAP_PIT_STATE2, - kvm_bindings::KVM_CAP_ADJUST_CLOCK, - kvm_bindings::KVM_CAP_DEBUGREGS, - kvm_bindings::KVM_CAP_MP_STATE, - kvm_bindings::KVM_CAP_VCPU_EVENTS, - kvm_bindings::KVM_CAP_XCRS, - kvm_bindings::KVM_CAP_XSAVE, - kvm_bindings::KVM_CAP_EXT_CPUID, - ]; - - /// Returns a ref to the supported `CpuId` for this Vm. - pub fn supported_cpuid(&self) -> &CpuId { - &self.supported_cpuid - } - - /// Returns a ref to the list of serializable MSR indices. - pub fn msrs_to_save(&self) -> &MsrList { - &self.msrs_to_save - } - /// Restores the KVM VM state. /// /// # Errors @@ -431,7 +310,6 @@ impl Vm { pic_master, pic_slave, ioapic, - kvm_cap_modifiers: self.kvm_cap_modifiers.clone(), }) } } @@ -447,9 +325,6 @@ pub struct VmState { // TODO: rename this field to adopt inclusive language once Linux updates it, too. pic_slave: kvm_irqchip, ioapic: kvm_irqchip, - - /// Additional capabilities that were specified in cpu template. - pub kvm_cap_modifiers: Vec, } #[cfg(target_arch = "x86_64")] @@ -474,43 +349,30 @@ pub(crate) mod tests { use crate::vstate::memory::GuestMemoryMmap; // Auxiliary function being used throughout the tests. - pub(crate) fn setup_vm(mem_size: usize) -> (Vm, GuestMemoryMmap) { - let gm = single_region_mem(mem_size); + pub(crate) fn setup_vm() -> (Kvm, Vm) { + let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); + let vm = Vm::new(&kvm).expect("Cannot create new vm"); + (kvm, vm) + } - let vm = Vm::new(vec![]).expect("Cannot create new vm"); + // Auxiliary function being used throughout the tests. + pub(crate) fn setup_vm_with_memory(mem_size: usize) -> (Kvm, Vm, GuestMemoryMmap) { + let (kvm, vm) = setup_vm(); + let gm = single_region_mem(mem_size); vm.memory_init(&gm, false).unwrap(); - - (vm, gm) + (kvm, vm, gm) } #[test] fn test_new() { // Testing with a valid /dev/kvm descriptor. - Vm::new(vec![]).unwrap(); - } - - #[test] - fn test_combine_capabilities() { - // Default caps for x86_64 and aarch64 both have KVM_CAP_IOEVENTFD and don't have - // KVM_CAP_IOMMU caps. - let additional_capabilities = vec![ - KvmCapability::Add(kvm_bindings::KVM_CAP_IOMMU), - KvmCapability::Remove(kvm_bindings::KVM_CAP_IOEVENTFD), - ]; - - let combined_caps = Vm::combine_capabilities(&additional_capabilities); - assert!(combined_caps - .iter() - .any(|c| *c == kvm_bindings::KVM_CAP_IOMMU)); - assert!(!combined_caps - .iter() - .any(|c| *c == kvm_bindings::KVM_CAP_IOEVENTFD)); + let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); + Vm::new(&kvm).unwrap(); } #[test] fn test_vm_memory_init() { - let vm = Vm::new(vec![]).expect("Cannot create new vm"); - + let (_, vm) = setup_vm(); // Create valid memory region and test that the initialization is successful. let gm = single_region_mem(0x1000); vm.memory_init(&gm, true).unwrap(); @@ -519,11 +381,11 @@ pub(crate) mod tests { #[cfg(target_arch = "x86_64")] #[test] fn test_vm_save_restore_state() { - let vm = Vm::new(vec![]).expect("new vm failed"); + let (_, vm) = setup_vm(); // Irqchips, clock and pitstate are not configured so trying to save state should fail. vm.save_state().unwrap_err(); - let (vm, _mem) = setup_vm(0x1000); + let (_, vm, _mem) = setup_vm_with_memory(0x1000); vm.setup_irqchip().unwrap(); let vm_state = vm.save_state().unwrap(); @@ -536,7 +398,7 @@ pub(crate) mod tests { assert_eq!(vm_state.pic_slave.chip_id, KVM_IRQCHIP_PIC_SLAVE); assert_eq!(vm_state.ioapic.chip_id, KVM_IRQCHIP_IOAPIC); - let (mut vm, _mem) = setup_vm(0x1000); + let (_, mut vm, _mem) = setup_vm_with_memory(0x1000); vm.setup_irqchip().unwrap(); vm.restore_state(&vm_state).unwrap(); @@ -547,11 +409,11 @@ pub(crate) mod tests { fn test_vm_save_restore_state_bad_irqchip() { use kvm_bindings::KVM_NR_IRQCHIPS; - let (vm, _mem) = setup_vm(0x1000); + let (_, vm, _mem) = setup_vm_with_memory(0x1000); vm.setup_irqchip().unwrap(); let mut vm_state = vm.save_state().unwrap(); - let (mut vm, _mem) = setup_vm(0x1000); + let (_, mut vm, _mem) = setup_vm_with_memory(0x1000); vm.setup_irqchip().unwrap(); // Try to restore an invalid PIC Master chip ID @@ -576,7 +438,7 @@ pub(crate) mod tests { fn test_vmstate_serde() { let mut snapshot_data = vec![0u8; 10000]; - let (mut vm, _) = setup_vm(0x1000); + let (_, mut vm, _) = setup_vm_with_memory(0x1000); vm.setup_irqchip().unwrap(); let state = vm.save_state().unwrap(); Snapshot::serialize(&mut snapshot_data.as_mut_slice(), &state).unwrap(); @@ -587,7 +449,7 @@ pub(crate) mod tests { #[test] fn test_set_kvm_memory_regions() { - let vm = Vm::new(vec![]).expect("Cannot create new vm"); + let (_, vm) = setup_vm(); let gm = single_region_mem(0x1000); let res = vm.set_kvm_memory_regions(&gm, false); From 525e68639d18aa59820380179774a5489495f60a Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Mon, 13 Jan 2025 17:37:52 +0000 Subject: [PATCH 47/78] test: add integration test for physical counter reset Add a test to verify the reset of the physical counter on aarch64 VMs. To do this we check registers saved in the snapshot and verify the counter value is less than some reasonably small number we choose. The value is based on the observation of how much cycles it takes for a VM to boot and be snapshotted. The idea is that this value will always be smaller than the actual physical counter on the host. Signed-off-by: Egor Lazarchuk --- .../functional/test_snapshot_basic.py | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tests/integration_tests/functional/test_snapshot_basic.py b/tests/integration_tests/functional/test_snapshot_basic.py index 4030bb4e981..875ef77dbaf 100644 --- a/tests/integration_tests/functional/test_snapshot_basic.py +++ b/tests/integration_tests/functional/test_snapshot_basic.py @@ -5,6 +5,7 @@ import filecmp import logging import os +import platform import re import shutil import time @@ -12,8 +13,11 @@ import pytest +import host_tools.cargo_build as host import host_tools.drive as drive_tools +from framework import utils from framework.microvm import SnapshotType +from framework.properties import global_props from framework.utils import check_filesystem, check_output from framework.utils_vsock import ( ECHO_SERVER_PORT, @@ -540,3 +544,56 @@ def test_vmgenid(guest_kernel_linux_6_1, rootfs, microvm_factory, snapshot_type) # Update the base for next iteration base_snapshot = snapshot + + +# TODO add `global_props.host_os == "amzn2"` condition +# once amazon linux kernels have patches. +@pytest.mark.skipif( + platform.machine() != "aarch64" or global_props.host_linux_version_tpl < (6, 4), + reason="This is aarch64 specific test and should only be run on 6.4 and later kernels", +) +def test_physical_couter_reset_aarch64(uvm_nano): + """ + Test that the CNTPCT_EL0 register is reset on VM boot. + We assume the smallest VM will not consume more than + some MAX_VALUE cycles to be created and snapshotted. + The MAX_VALUE is selected by doing a manual run of this test and + seeing what the actual counter value is. The assumption here is that + if resetting will not occur the guest counter value will be huge as it + will be a copy of host value. The host value in its turn will be huge because + it will include host OS boot + CI prep + other CI tests ... + """ + vm = uvm_nano + vm.add_net_iface() + vm.start() + + snapshot = vm.snapshot_full() + vm.kill() + snap_editor = host.get_binary("snapshot-editor") + + cntpct_el0 = hex(0x603000000013DF01) + # If a CPU runs at 3GHz, it will have a counter value of 1_000_000_000 + # in 1/3 of a second. The host surely will run for more than 1/3 second before + # executing this test. + max_value = 800_000_000 + + cmd = [ + str(snap_editor), + "info-vmstate", + "vcpu-states", + "--vmstate-path", + str(snapshot.vmstate), + ] + _, stdout, _ = utils.check_output(cmd) + + # The output will look like this: + # kvm_mp_state: 0x0 + # mpidr: 0x80000000 + # 0x6030000000100000 0x0000000e0 + # 0x6030000000100002 0xffff00fe33c0 + for line in stdout.splitlines(): + parts = line.split() + if len(parts) == 2: + reg_id, reg_value = parts + if reg_id == cntpct_el0: + assert int(reg_value, 16) < max_value From da1cc8a8cd5ff8c8edd569dd656257c68ecbc157 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Fri, 17 Jan 2025 12:45:56 +0000 Subject: [PATCH 48/78] feat: more descriptive panic messages in uffd examples Add additional info to the panic messages in uffd examples to aid with potential errors. Signed-off-by: Egor Lazarchuk --- src/firecracker/examples/uffd/uffd_utils.rs | 25 ++++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/src/firecracker/examples/uffd/uffd_utils.rs b/src/firecracker/examples/uffd/uffd_utils.rs index 37aa63c62a3..8cc70ab7c21 100644 --- a/src/firecracker/examples/uffd/uffd_utils.rs +++ b/src/firecracker/examples/uffd/uffd_utils.rs @@ -61,17 +61,30 @@ impl UffdHandler { let mut message_buf = vec![0u8; 1024]; let (bytes_read, file) = stream .recv_with_fd(&mut message_buf[..]) - .expect("Cannot recv_with_fd"); + .expect("Cannot read from a stream"); message_buf.resize(bytes_read, 0); - let body = String::from_utf8(message_buf).unwrap(); - let file = file.expect("Uffd not passed through UDS!"); + let body = String::from_utf8(message_buf.clone()).unwrap_or_else(|_| { + panic!( + "Received body is not a utf-8 valid string. Raw bytes received: {message_buf:#?}" + ) + }); + let file = + file.unwrap_or_else(|| panic!("Did not receive Uffd from UDS. Received body: {body}")); - let mappings = serde_json::from_str::>(&body) - .expect("Cannot deserialize memory mappings."); + let mappings = + serde_json::from_str::>(&body).unwrap_or_else(|_| { + panic!("Cannot deserialize memory mappings. Received body: {body}") + }); let memsize: usize = mappings.iter().map(|r| r.size).sum(); // Page size is the same for all memory regions, so just grab the first one - let page_size = mappings.first().unwrap().page_size_kib; + let first_mapping = mappings.first().unwrap_or_else(|| { + panic!( + "Cannot get the first mapping. Mappings size is {}. Received body: {body}", + mappings.len() + ) + }); + let page_size = first_mapping.page_size_kib; // Make sure memory size matches backing data size. assert_eq!(memsize, size); From 2b3ec744655e9dae39dd195812787e4bf1a46e69 Mon Sep 17 00:00:00 2001 From: Jack Thomson Date: Wed, 15 Jan 2025 11:40:21 +0000 Subject: [PATCH 49/78] docs: Update documentation on balloon device Add notes around known limitations of the balloon device Signed-off-by: Jack Thomson --- docs/ballooning.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docs/ballooning.md b/docs/ballooning.md index f5a19271265..f2953d630b5 100644 --- a/docs/ballooning.md +++ b/docs/ballooning.md @@ -263,3 +263,20 @@ cannot be enabled later by providing a `polling_interval` non-zero value. Furthermore, if the balloon was configured with statistics pre-boot through a non-zero `stats_polling_interval_s` value, the statistics cannot be disabled through a `polling_interval` value of zero post-boot. + +## Balloon Caveats + +- Firecracker has no control over the speed of inflation or deflation; this is + dictated by the guest kernel driver. + +- The balloon will continually attempt to reach its target size, which can be a + CPU-intensive process. It is therefore recommended to set realistic targets + or, after a period of stagnation in the inflation, update the target size to + be close to the inflated size. + +- The `deflate_on_oom` flag is a mechanism to prevent the guest from crashing or + terminating processes; it is not meant to be used continually to free memory. + Doing this will be a CPU-intensive process, as the balloon driver is designed + to deflate and release memory slowly. This is also compounded if the balloon + has yet to reach its target size, as it will attempt to inflate while also + deflating. From 994198b0f4fa72837e2521520f21f9103e2803ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Barb=C3=A1chano?= Date: Mon, 20 Jan 2025 14:25:29 +0100 Subject: [PATCH 50/78] tests: wait for /dev/ptp0 to appear MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have sometimes seen that this test can fail. In theory it could happen that udev has not created the /dev/ptp0 file before we run the ssh command. So let's wait in udev for the device to appear up to 5 seconds. Signed-off-by: Pablo Barbáchano --- tests/integration_tests/functional/test_kvm_ptp.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/integration_tests/functional/test_kvm_ptp.py b/tests/integration_tests/functional/test_kvm_ptp.py index 70b5bb877bc..4b44ca124eb 100644 --- a/tests/integration_tests/functional/test_kvm_ptp.py +++ b/tests/integration_tests/functional/test_kvm_ptp.py @@ -6,19 +6,18 @@ import pytest -def test_kvm_ptp(uvm_plain_any): +def test_kvm_ptp(uvm_any_booted): """Test kvm_ptp is usable""" - vm = uvm_plain_any + vm = uvm_any_booted if vm.guest_kernel_version[:2] < (6, 1): pytest.skip("Only supported in kernel 6.1 and after") - vm.spawn() - vm.basic_config(vcpu_count=2, mem_size_mib=256) - vm.add_net_iface() - vm.start() + _, dmesg, _ = vm.ssh.check_output("dmesg |grep -i ptp") + assert "PTP clock support registered" in dmesg - vm.ssh.check_output("[ -c /dev/ptp0 ]") + # wait up to 5s to see the PTP device + vm.ssh.check_output("udevadm wait -t 5 /dev/ptp0") # phc_ctl[14515.127]: clock time is 1697545854.728335694 or Tue Oct 17 12:30:54 2023 vm.ssh.check_output("phc_ctl /dev/ptp0 -- get") From c0a5c517758049d68c3cc084819c5f3605b58643 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Barb=C3=A1chano?= Date: Wed, 30 Oct 2024 16:14:33 +0100 Subject: [PATCH 51/78] tests: make doctests work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ./tools/devtool -y test -- --doctest-modules framework Signed-off-by: Pablo Barbáchano --- tests/framework/properties.py | 4 +- tests/framework/utils.py | 4 +- tests/framework/utils_imdsv2.py | 7 ++- tests/host_tools/udp_offload.py | 64 +++++++++++----------- tests/integration_tests/style/test_rust.py | 10 +--- 5 files changed, 43 insertions(+), 46 deletions(-) diff --git a/tests/framework/properties.py b/tests/framework/properties.py index 7072a2ff3ca..c7c9dfe789d 100644 --- a/tests/framework/properties.py +++ b/tests/framework/properties.py @@ -26,7 +26,7 @@ def get_os_version(): """Get the OS version >>> get_os_version() - Ubuntu 18.04.6 LTS + 'Ubuntu 24.04.1 LTS' """ os_release = Path("/etc/os-release").read_text(encoding="ascii") @@ -41,7 +41,7 @@ def get_host_os(kv: str = None): This only works for AL2 and AL2023 >>> get_host_os("6.1.41-63.118.amzn2023.x86_64") - amzn2023 + 'amzn2023' """ if kv is None: kv = platform.release() diff --git a/tests/framework/utils.py b/tests/framework/utils.py index 8bb1d8c744e..e89b8706651 100644 --- a/tests/framework/utils.py +++ b/tests/framework/utils.py @@ -660,8 +660,8 @@ class Timeout: """ A Context Manager to timeout sections of code. - >>> with Timeout(30): - >>> time.sleep(35) + >>> with Timeout(30): # doctest: +SKIP + ... time.sleep(35) # doctest: +SKIP """ def __init__(self, seconds, msg="Timed out"): diff --git a/tests/framework/utils_imdsv2.py b/tests/framework/utils_imdsv2.py index 6420f7f9acf..7a6a89b58a5 100644 --- a/tests/framework/utils_imdsv2.py +++ b/tests/framework/utils_imdsv2.py @@ -24,7 +24,8 @@ class IMDSv2Client: """ A simple IMDSv2 client. - >>> IMDSv2Client().get("/meta-data/instance-type") + >>> IMDSv2Client().get("/meta-data/instance-type") # doctest: +SKIP + ... """ def __init__(self, endpoint="http://169.254.169.254", version="latest"): @@ -49,8 +50,8 @@ def get(self, path): """ Get a metadata path from IMDSv2 - >>> IMDSv2Client().get("/meta-data/instance-type") - >>> m5d.metal + >>> IMDSv2Client().get("/meta-data/instance-type") # doctest: +SKIP + 'm5d.metal' """ headers = {IMDSV2_HDR_TOKEN: self.get_token()} url = f"{self.endpoint}/{self.version}{path}" diff --git a/tests/host_tools/udp_offload.py b/tests/host_tools/udp_offload.py index e9ab6a93966..e105c8e08bd 100644 --- a/tests/host_tools/udp_offload.py +++ b/tests/host_tools/udp_offload.py @@ -24,35 +24,37 @@ def eprint(*args, **kwargs): SOL_UDP = 17 # Protocol number for UDP UDP_SEGMENT = 103 # Option code for UDP segmentation (non-standard) -# Get the IP and port from command-line arguments -if len(sys.argv) != 3: - eprint("Usage: python3 udp_offload.py ") - sys.exit(1) -ip_address = sys.argv[1] -port = int(sys.argv[2]) - -# Create a UDP socket -sockfd = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - -# Set the UDP segmentation option (UDP_SEGMENT) to 1400 bytes -OPTVAL = 1400 -try: - sockfd.setsockopt(SOL_UDP, UDP_SEGMENT, OPTVAL) -except (AttributeError, PermissionError): - eprint("Unable to set UDP_SEGMENT option") - sys.exit(1) - -# Set the destination address and port -servaddr = (ip_address, port) - -# Send the message to the destination address -MESSAGE = b"x" -try: - sockfd.sendto(MESSAGE, servaddr) - print("Message sent successfully") -except socket.error as e: - eprint(f"Error sending message: {e}") - sys.exit(1) - -sockfd.close() +if __name__ == "__main__": + # Get the IP and port from command-line arguments + if len(sys.argv) != 3: + eprint("Usage: python3 udp_offload.py ") + sys.exit(1) + + ip_address = sys.argv[1] + port = int(sys.argv[2]) + + # Create a UDP socket + sockfd = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + + # Set the UDP segmentation option (UDP_SEGMENT) to 1400 bytes + OPTVAL = 1400 + try: + sockfd.setsockopt(SOL_UDP, UDP_SEGMENT, OPTVAL) + except (AttributeError, PermissionError): + eprint("Unable to set UDP_SEGMENT option") + sys.exit(1) + + # Set the destination address and port + servaddr = (ip_address, port) + + # Send the message to the destination address + MESSAGE = b"x" + try: + sockfd.sendto(MESSAGE, servaddr) + print("Message sent successfully") + except socket.error as e: + eprint(f"Error sending message: {e}") + sys.exit(1) + + sockfd.close() diff --git a/tests/integration_tests/style/test_rust.py b/tests/integration_tests/style/test_rust.py index 295f2f209f1..580c33eb03d 100644 --- a/tests/integration_tests/style/test_rust.py +++ b/tests/integration_tests/style/test_rust.py @@ -6,11 +6,7 @@ def test_rust_order(): - """ - Tests that `Cargo.toml` dependencies are alphabetically ordered. - - @type: style - """ + """Tests that `Cargo.toml` dependencies are alphabetically ordered.""" # Runs `cargo-sort` with the current working directory (`cwd`) as the repository root. _, _, _ = utils.check_output( @@ -19,9 +15,7 @@ def test_rust_order(): def test_rust_style(): - """ - Test that rust code passes style checks. - """ + """Test that rust code passes style checks.""" # ../src/io_uring/src/bindings.rs config = open("fmt.toml", encoding="utf-8").read().replace("\n", ",") From 85f1a25815dbcae6e2b518f892db5030093847b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Barb=C3=A1chano?= Date: Mon, 20 Jan 2025 19:59:39 +0100 Subject: [PATCH 52/78] tests: rework gitlint_rules.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make doctests and code match. Now running `--doctest-modules` passes the doctests. Simplified the code a bit to make it easier to understand. Signed-off-by: Pablo Barbáchano --- tests/framework/gitlint_rules.py | 130 ++++++++++++++----------------- 1 file changed, 58 insertions(+), 72 deletions(-) diff --git a/tests/framework/gitlint_rules.py b/tests/framework/gitlint_rules.py index fad6ccb47d2..7f38cf11eab 100644 --- a/tests/framework/gitlint_rules.py +++ b/tests/framework/gitlint_rules.py @@ -2,6 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 """The user defined rules for gitlint.""" +import re + from gitlint.rules import CommitRule, RuleViolation @@ -22,16 +24,20 @@ class EndsSigned(CommitRule): def validate(self, commit): r"""Validates Signed-off-by and Co-authored-by tags as Linux's scripts/checkpatch.pl - >>> from gitlint.tests.base import BaseTestCase + >>> from gitlint.git import GitContext >>> from gitlint.rules import RuleViolation ... >>> ends_signed = EndsSigned() + >>> miss_sob_follows_coab = "Missing 'Signed-off-by' following 'Co-authored-by'" + >>> miss_sob = "'Signed-off-by' not found in commit message body" + >>> non_sign = "Non 'Co-authored-by' or 'Signed-off-by' string found following 1st 'Signed-off-by'" + >>> email_no_match = "'Co-authored-by' and 'Signed-off-by' name/email do not match" ... >>> msg1 = ( ... f"Title\n\nMessage.\n\n" ... f"Signed-off-by: name " ... ) - >>> commit1 = BaseTestCase.gitcommit(msg1) + >>> commit1 = GitContext.from_commit_msg(msg1).commits[0] >>> ends_signed.validate(commit1) [] >>> msg2 = ( @@ -39,52 +45,41 @@ def validate(self, commit): ... f"Co-authored-by: name \n\n" ... f"Signed-off-by: name " ... ) - >>> commit2 = BaseTestCase.gitcommit(msg2) + >>> commit2 = GitContext.from_commit_msg(msg2).commits[0] >>> ends_signed.validate(commit2) [] - >>> msg3 = ( - ... f"Title\n\nMessage.\n\n" - ... ) - >>> commit3 = BaseTestCase.gitcommit(msg3) + >>> msg3 = f"Title\n\nMessage.\n\n" + >>> commit3 = GitContext.from_commit_msg(msg3).commits[0] >>> vio3 = ends_signed.validate(commit3) - >>> vio_msg3 = ( - ... f"'Signed-off-by:' not found in commit message body" - ... ) - >>> vio3 == [RuleViolation("UC2", vio_msg3)] + >>> vio3 == [RuleViolation("UC2", miss_sob)] True >>> msg4 = ( ... f"Title\n\nMessage.\n\n" ... f"Signed-off-by: name \n\na sentence" ... ) - >>> commit4 = BaseTestCase.gitcommit(msg4) + >>> commit4 = GitContext.from_commit_msg(msg4).commits[0] >>> vio4 = ends_signed.validate(commit4) - >>> vio_msg4 = ( - ... f"Non 'Co-authored-by:' or 'Signed-off-by:' string found following 1st 'Signed-off-by:'" - ... ) - >>> vio4 == [RuleViolation("UC2", vio_msg4, None, 5)] + >>> vio4 == [RuleViolation("UC2", non_sign, None, 6)] True >>> msg5 = ( ... f"Title\n\nMessage.\n\n" ... f"Co-authored-by: name " ... ) - >>> commit5 = BaseTestCase.gitcommit(msg5) + >>> commit5 = GitContext.from_commit_msg(msg5).commits[0] >>> vio5 = ends_signed.validate(commit5) - >>> vio_msg5 = ( - ... f"Missing 'Signed-off-by:' following 'Co-authored-by:'" - ... ) - >>> vio5 == [RuleViolation("UC2", vio_msg5, None, 2)] + >>> vio5 == [ + ... RuleViolation("UC2", miss_sob, None, None), + ... RuleViolation("UC2", miss_sob_follows_coab, None, 5) + ... ] True >>> msg6 = ( ... f"Title\n\nMessage.\n\n" ... f"Co-authored-by: name \n\n" ... f"Signed-off-by: different name " ... ) - >>> commit6 = BaseTestCase.gitcommit(msg6) + >>> commit6 = GitContext.from_commit_msg(msg6).commits[0] >>> vio6 = ends_signed.validate(commit6) - >>> vio_msg6 = ( - ... f"'Co-authored-by:' and 'Signed-off-by:' name/email do not match" - ... ) - >>> vio6 == [RuleViolation("UC2", vio_msg6, None, 6)] + >>> vio6 == [RuleViolation("UC2", email_no_match, None, 6)] True """ @@ -92,59 +87,50 @@ def validate(self, commit): # Utilities def vln(stmt, i): - return RuleViolation(self.id, stmt, None, i) - - co_auth = "Co-authored-by:" - sig = "Signed-off-by:" + violations.append(RuleViolation(self.id, stmt, None, i)) - message_iter = enumerate(commit.message.original.split("\n")) + coab = "Co-authored-by" + sob = "Signed-off-by" - # Skip ahead to the first signoff or co-author tag - - # Checks commit message contains a `Signed-off-by` string - for i, line in message_iter: - if line.startswith(sig) or line.startswith(co_auth): - break - else: - # No signature was found in the message (before `message_iter` ended) - # This check here can have false-negatives (e.g. if the body ends with only - # a 'Co-authored-by' tag), but then below will realize that the co-authored-by - # tag isnt followed by a Signed-off-by tag and fail (and also the DCO check will - # complain). - violations.append(vln(f"'{sig}' not found in commit message body", None)) - - # Check that from here on out we only have signatures and co-authors, and that - # every co-author is immediately followed by a signature with the same name/email. - for i, line in message_iter: - if line.startswith(co_auth): - try: - _, next_line = next(message_iter) - except StopIteration: - violations.append( - vln(f"Missing '{sig}' tag following '{co_auth}'", i) - ) - else: - if not next_line.startswith(sig): - violations.append( - vln(f"Missing '{sig}' tag following '{co_auth}'", i + 1) - ) - continue - - if next_line.split(":")[1].strip() != line.split(":")[1].strip(): - violations.append( - vln(f"{co_auth} and {sig} name/email do not match", i + 1) - ) - continue - - if line.startswith(sig) or not line.strip(): + # find trailers + trailers = [] + for i, line in enumerate(commit.message.original.splitlines()): + # ignore empty lines + if not line: continue + match = re.match(r"([\w-]+):\s+(.*)", line) + if match: + key, val = match.groups() + trailers.append((i, key, val)) + else: + trailers.append((i, "line", line)) + # artificial line so we can check any "previous line" rules + trailers.append((trailers[-1][0] + 1, None, None)) - violations.append( + # Checks commit message contains a `Signed-off-by` string + if not [x for x in trailers if x[1] == sob]: + vln(f"'{sob}' not found in commit message body", None) + + prev_trailer, prev_value = None, None + sig_trailers = False + for i, trailer, value in trailers: + if trailer in {sob, coab}: + sig_trailers = True + elif trailer not in {sob, coab, None} and sig_trailers: vln( - f"Non '{co_auth}' or '{sig}' string found following 1st '{sig}'", + f"Non '{coab}' or '{sob}' string found following 1st '{sob}'", i, ) - ) + # Every co-author is immediately followed by a signature + if prev_trailer == coab: + if trailer != sob: + vln(f"Missing '{sob}' following '{coab}'", i) + else: + # with the same name/email. + if value != prev_value: + vln(f"'{coab}' and '{sob}' name/email do not match", i) + + prev_trailer, prev_value = trailer, value # Return errors return violations From dfb45dc4213bcb1c9704435457e233d3a210dce2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Barb=C3=A1chano?= Date: Tue, 21 Jan 2025 09:45:16 +0100 Subject: [PATCH 53/78] checkstyle: run doctest tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Run doctests tests as part of the style step Signed-off-by: Pablo Barbáchano --- tools/devtool | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/devtool b/tools/devtool index 29434bf8100..768b303e8c5 100755 --- a/tools/devtool +++ b/tools/devtool @@ -934,7 +934,8 @@ cmd_mkdocs() { } cmd_checkstyle() { - cmd_test --no-build --no-kvm-check -- integration_tests/style -n 4 --dist worksteal + cmd_test --no-build --no-kvm-check -- -n 4 --dist worksteal integration_tests/style + cmd_test --no-build --no-kvm-check -- -n 4 --doctest-modules framework } # Check if able to run firecracker. From bcf44d6820f64b62927d4bbf6228dc4effdb6c0b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 Jan 2025 16:30:15 +0000 Subject: [PATCH 54/78] build(deps): Bump the firecracker group with 15 updates Bumps the firecracker group with 15 updates: | Package | From | To | | --- | --- | --- | | [uuid](https://github.com/uuid-rs/uuid) | `1.11.1` | `1.12.0` | | [serde_json](https://github.com/serde-rs/json) | `1.0.135` | `1.0.137` | | [micro_http](https://github.com/firecracker-microvm/micro-http) | ``8182cd5`` | ``ef96f62`` | | [log](https://github.com/rust-lang/log) | `0.4.22` | `0.4.25` | | [semver](https://github.com/dtolnay/semver) | `1.0.24` | `1.0.25` | | [aws-lc-rs](https://github.com/aws/aws-lc-rs) | `1.12.0` | `1.12.1` | | [bitflags](https://github.com/bitflags/bitflags) | `2.7.0` | `2.8.0` | | [kvm-bindings](https://github.com/rust-vmm/kvm) | `0.10.0` | `0.11.0` | | [kvm-ioctls](https://github.com/rust-vmm/kvm) | `0.19.1` | `0.20.0` | | [anstyle-wincon](https://github.com/rust-cli/anstyle) | `3.0.6` | `3.0.7` | | [aws-lc-fips-sys](https://github.com/aws/aws-lc-rs) | `0.13.0` | `0.13.1` | | [aws-lc-sys](https://github.com/aws/aws-lc-rs) | `0.24.1` | `0.25.0` | | [cc](https://github.com/rust-lang/cc-rs) | `1.2.9` | `1.2.10` | | [indexmap](https://github.com/indexmap-rs/indexmap) | `2.7.0` | `2.7.1` | | [uuid-macro-internal](https://github.com/uuid-rs/uuid) | `1.11.1` | `1.12.0` | Updates `uuid` from 1.11.1 to 1.12.0 - [Release notes](https://github.com/uuid-rs/uuid/releases) - [Commits](https://github.com/uuid-rs/uuid/compare/1.11.1...1.12.0) Updates `serde_json` from 1.0.135 to 1.0.137 - [Release notes](https://github.com/serde-rs/json/releases) - [Commits](https://github.com/serde-rs/json/compare/v1.0.135...v1.0.137) Updates `micro_http` from `8182cd5` to `ef96f62` - [Commits](https://github.com/firecracker-microvm/micro-http/compare/8182cd5523b63ceb52ad9d0e7eb6fb95683e6d1b...ef96f623c46e221ebf9b6037567f97ec57683afd) Updates `log` from 0.4.22 to 0.4.25 - [Release notes](https://github.com/rust-lang/log/releases) - [Changelog](https://github.com/rust-lang/log/blob/master/CHANGELOG.md) - [Commits](https://github.com/rust-lang/log/compare/0.4.22...0.4.25) Updates `semver` from 1.0.24 to 1.0.25 - [Release notes](https://github.com/dtolnay/semver/releases) - [Commits](https://github.com/dtolnay/semver/compare/1.0.24...1.0.25) Updates `aws-lc-rs` from 1.12.0 to 1.12.1 - [Release notes](https://github.com/aws/aws-lc-rs/releases) - [Commits](https://github.com/aws/aws-lc-rs/compare/v1.12.0...v1.12.1) Updates `bitflags` from 2.7.0 to 2.8.0 - [Release notes](https://github.com/bitflags/bitflags/releases) - [Changelog](https://github.com/bitflags/bitflags/blob/main/CHANGELOG.md) - [Commits](https://github.com/bitflags/bitflags/compare/2.7.0...2.8.0) Updates `kvm-bindings` from 0.10.0 to 0.11.0 - [Release notes](https://github.com/rust-vmm/kvm/releases) - [Changelog](https://github.com/rust-vmm/kvm/blob/v0.11.0/CHANGELOG.md) - [Commits](https://github.com/rust-vmm/kvm/compare/v0.10.0...v0.11.0) Updates `kvm-ioctls` from 0.19.1 to 0.20.0 - [Release notes](https://github.com/rust-vmm/kvm/releases) - [Commits](https://github.com/rust-vmm/kvm/commits) Updates `anstyle-wincon` from 3.0.6 to 3.0.7 - [Commits](https://github.com/rust-cli/anstyle/compare/anstyle-wincon-v3.0.6...anstyle-wincon-v3.0.7) Updates `aws-lc-fips-sys` from 0.13.0 to 0.13.1 - [Release notes](https://github.com/aws/aws-lc-rs/releases) - [Commits](https://github.com/aws/aws-lc-rs/compare/aws-lc-fips-sys/v0.13.0...aws-lc-fips-sys/v0.13.1) Updates `aws-lc-sys` from 0.24.1 to 0.25.0 - [Release notes](https://github.com/aws/aws-lc-rs/releases) - [Commits](https://github.com/aws/aws-lc-rs/compare/aws-lc-sys/v0.24.1...aws-lc-sys/v0.25.0) Updates `cc` from 1.2.9 to 1.2.10 - [Release notes](https://github.com/rust-lang/cc-rs/releases) - [Changelog](https://github.com/rust-lang/cc-rs/blob/main/CHANGELOG.md) - [Commits](https://github.com/rust-lang/cc-rs/compare/cc-v1.2.9...cc-v1.2.10) Updates `indexmap` from 2.7.0 to 2.7.1 - [Changelog](https://github.com/indexmap-rs/indexmap/blob/master/RELEASES.md) - [Commits](https://github.com/indexmap-rs/indexmap/compare/2.7.0...2.7.1) Updates `uuid-macro-internal` from 1.11.1 to 1.12.0 - [Release notes](https://github.com/uuid-rs/uuid/releases) - [Commits](https://github.com/uuid-rs/uuid/compare/1.11.1...1.12.0) --- updated-dependencies: - dependency-name: uuid dependency-type: direct:production update-type: version-update:semver-minor dependency-group: firecracker - dependency-name: serde_json dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: micro_http dependency-type: direct:production dependency-group: firecracker - dependency-name: log dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: semver dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: aws-lc-rs dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: bitflags dependency-type: direct:production update-type: version-update:semver-minor dependency-group: firecracker - dependency-name: kvm-bindings dependency-type: direct:production update-type: version-update:semver-minor dependency-group: firecracker - dependency-name: kvm-ioctls dependency-type: direct:production update-type: version-update:semver-minor dependency-group: firecracker - dependency-name: anstyle-wincon dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: aws-lc-fips-sys dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: aws-lc-sys dependency-type: indirect update-type: version-update:semver-minor dependency-group: firecracker - dependency-name: cc dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: indexmap dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: uuid-macro-internal dependency-type: indirect update-type: version-update:semver-minor dependency-group: firecracker ... Signed-off-by: dependabot[bot] --- Cargo.lock | 86 +++++++++++++++--------------- src/clippy-tracing/Cargo.toml | 2 +- src/cpu-template-helper/Cargo.toml | 2 +- src/firecracker/Cargo.toml | 4 +- src/log-instrument/Cargo.toml | 2 +- src/seccompiler/Cargo.toml | 2 +- src/snapshot-editor/Cargo.toml | 2 +- src/vmm/Cargo.toml | 14 ++--- 8 files changed, 57 insertions(+), 57 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1e9a4972b07..6199774c573 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -103,11 +103,12 @@ dependencies = [ [[package]] name = "anstyle-wincon" -version = "3.0.6" +version = "3.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" +checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" dependencies = [ "anstyle", + "once_cell", "windows-sys 0.59.0", ] @@ -125,25 +126,24 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "aws-lc-fips-sys" -version = "0.13.0" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59057b878509d88952425fe694a2806e468612bde2d71943f3cd8034935b5032" +checksum = "f8c7557f6c81ecd3e38582996b31a0f329900586abaae5f092e756686958f22c" dependencies = [ "bindgen 0.69.5", "cc", "cmake", "dunce", "fs_extra", - "libc", "paste", "regex", ] [[package]] name = "aws-lc-rs" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f409eb70b561706bf8abba8ca9c112729c481595893fd06a2dd9af8ed8441148" +checksum = "1ea835662a0af02443aa1396d39be523bbf8f11ee6fad20329607c480bea48c3" dependencies = [ "aws-lc-fips-sys", "aws-lc-sys", @@ -154,9 +154,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.24.1" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "923ded50f602b3007e5e63e3f094c479d9c8a9b42d7f4034e4afe456aa48bfd2" +checksum = "71b2ddd3ada61a305e1d8bb6c005d1eaa7d14d903681edfc400406d523a9b491" dependencies = [ "bindgen 0.69.5", "cc", @@ -187,7 +187,7 @@ version = "0.68.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078" dependencies = [ - "bitflags 2.7.0", + "bitflags 2.8.0", "cexpr", "clang-sys", "lazy_static", @@ -207,7 +207,7 @@ version = "0.69.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" dependencies = [ - "bitflags 2.7.0", + "bitflags 2.8.0", "cexpr", "clang-sys", "itertools 0.10.5", @@ -232,9 +232,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.7.0" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1be3f42a67d6d345ecd59f675f3f012d6974981560836e938c22b424b85ce1be" +checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" [[package]] name = "byteorder" @@ -260,9 +260,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.9" +version = "1.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8293772165d9345bdaaa39b45b2109591e63fe5e6fbc23c6ff930a048aa310b" +checksum = "13208fcbb66eaeffe09b99fffbe1af420f00a7b35aa99ad683dfc1aa76145229" dependencies = [ "jobserver", "libc", @@ -584,7 +584,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -632,7 +632,7 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31c683a9f13de31432e6097131d5f385898c7f0635c0f392b9d0fa165063c8ac" dependencies = [ - "bitflags 2.7.0", + "bitflags 2.8.0", "cfg-if", "log", "managed", @@ -732,9 +732,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "indexmap" -version = "2.7.0" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" +checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" dependencies = [ "equivalent", "hashbrown", @@ -813,9 +813,9 @@ dependencies = [ [[package]] name = "kvm-bindings" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4933174d0cc4b77b958578cd45784071cc5ae212c2d78fbd755aaaa6dfa71a" +checksum = "501bc0717c6a9fc409f29047ebeb6040a4d304344698abb268c4c6a440e6a09a" dependencies = [ "serde", "vmm-sys-util", @@ -824,11 +824,11 @@ dependencies = [ [[package]] name = "kvm-ioctls" -version = "0.19.1" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e013ae7fcd2c6a8f384104d16afe7ea02969301ea2bb2a56e44b011ebc907cab" +checksum = "3f9120f23310f01dd7b4fbb4ae1fd4eae3e09a7aa5b77038b08a6b37099d8ef4" dependencies = [ - "bitflags 2.7.0", + "bitflags 2.8.0", "kvm-bindings", "libc", "vmm-sys-util", @@ -879,9 +879,9 @@ checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" [[package]] name = "log" -version = "0.4.22" +version = "0.4.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" dependencies = [ "serde", ] @@ -928,7 +928,7 @@ dependencies = [ [[package]] name = "micro_http" version = "0.1.0" -source = "git+https://github.com/firecracker-microvm/micro-http#8182cd5523b63ceb52ad9d0e7eb6fb95683e6d1b" +source = "git+https://github.com/firecracker-microvm/micro-http#ef96f623c46e221ebf9b6037567f97ec57683afd" dependencies = [ "libc", "vmm-sys-util", @@ -946,7 +946,7 @@ version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" dependencies = [ - "bitflags 2.7.0", + "bitflags 2.8.0", "cfg-if", "libc", ] @@ -1046,7 +1046,7 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14cae93065090804185d3b75f0bf93b8eeda30c7a9b4a33d3bdb3988d6229e50" dependencies = [ - "bitflags 2.7.0", + "bitflags 2.8.0", "lazy_static", "num-traits", "rand", @@ -1157,11 +1157,11 @@ version = "0.38.43" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a78891ee6bf2340288408954ac787aa063d8e8817e9f53abb37c695c6d834ef6" dependencies = [ - "bitflags 2.7.0", + "bitflags 2.8.0", "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1195,9 +1195,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.24" +version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cb6eb87a131f756572d7fb904f6e7b68633f09cca868c5df1c4b8d1a694bbba" +checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03" dependencies = [ "serde", ] @@ -1224,9 +1224,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.135" +version = "1.0.137" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b0d7ba2887406110130a978386c4e1befb98c674b4fba677954e4db976630d9" +checksum = "930cfb6e6abf99298aaad7d29abbef7a9999a9a8806a40088f55f0dcec03146b" dependencies = [ "itoa", "memchr", @@ -1436,7 +1436,7 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18d8b176d4d3e420685e964f87c25df5fdd5b26d7eb0d0e7c892d771f5b81035" dependencies = [ - "bitflags 2.7.0", + "bitflags 2.8.0", "cfg-if", "libc", "nix", @@ -1473,9 +1473,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.11.1" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b913a3b5fe84142e269d63cc62b64319ccaf89b748fc31fe025177f767a756c4" +checksum = "744018581f9a3454a9e15beb8a33b017183f1e7c0cd170232a2d1453b23a51c4" dependencies = [ "getrandom", "rand", @@ -1484,9 +1484,9 @@ dependencies = [ [[package]] name = "uuid-macro-internal" -version = "1.11.1" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91084647266237a48351d05d55dee65bba9e1b597f555fcf54680f820284a1c" +checksum = "144b419c512fdd5eaa4c2998813e32aaab2b257746ee038de93985a99635501d" dependencies = [ "proc-macro2", "quote", @@ -1505,7 +1505,7 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bce0aad4d8776cb64f1ac591e908a561c50ba6adac4416296efee590b155623f" dependencies = [ - "bitflags 2.7.0", + "bitflags 2.8.0", "libc", "uuid", "vm-memory", @@ -1555,7 +1555,7 @@ dependencies = [ "aws-lc-rs", "base64", "bincode", - "bitflags 2.7.0", + "bitflags 2.8.0", "crc64", "criterion", "derive_more", @@ -1653,7 +1653,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/src/clippy-tracing/Cargo.toml b/src/clippy-tracing/Cargo.toml index 0594fc3e349..489a77bb918 100644 --- a/src/clippy-tracing/Cargo.toml +++ b/src/clippy-tracing/Cargo.toml @@ -18,7 +18,7 @@ syn = { version = "2.0.96", features = ["full", "extra-traits", "visit", "visit- walkdir = "2.5.0" [dev-dependencies] -uuid = { version = "1.11.1", features = ["v4"] } +uuid = { version = "1.12.0", features = ["v4"] } [lints] workspace = true diff --git a/src/cpu-template-helper/Cargo.toml b/src/cpu-template-helper/Cargo.toml index 6ff36523398..07ac4c310ba 100644 --- a/src/cpu-template-helper/Cargo.toml +++ b/src/cpu-template-helper/Cargo.toml @@ -15,7 +15,7 @@ displaydoc = "0.2.5" libc = "0.2.169" log-instrument = { path = "../log-instrument", optional = true } serde = { version = "1.0.217", features = ["derive"] } -serde_json = "1.0.135" +serde_json = "1.0.137" thiserror = "2.0.11" vmm = { path = "../vmm" } diff --git a/src/firecracker/Cargo.toml b/src/firecracker/Cargo.toml index e6af962ea20..cdb89d174d3 100644 --- a/src/firecracker/Cargo.toml +++ b/src/firecracker/Cargo.toml @@ -24,7 +24,7 @@ micro_http = { git = "https://github.com/firecracker-microvm/micro-http" } serde = { version = "1.0.217", features = ["derive"] } serde_derive = "1.0.136" -serde_json = "1.0.135" +serde_json = "1.0.137" thiserror = "2.0.11" timerfd = "1.6.0" utils = { path = "../utils" } @@ -43,7 +43,7 @@ userfaultfd = "0.8.1" [build-dependencies] seccompiler = { path = "../seccompiler" } serde = { version = "1.0.217" } -serde_json = "1.0.135" +serde_json = "1.0.137" [features] tracing = ["log-instrument", "utils/tracing", "vmm/tracing"] diff --git a/src/log-instrument/Cargo.toml b/src/log-instrument/Cargo.toml index d9ea9d7dd63..ef5ffde6d7d 100644 --- a/src/log-instrument/Cargo.toml +++ b/src/log-instrument/Cargo.toml @@ -28,7 +28,7 @@ name = "five" name = "six" [dependencies] -log = "0.4.22" +log = "0.4.25" log-instrument-macros = { path = "../log-instrument-macros" } [dev-dependencies] diff --git a/src/seccompiler/Cargo.toml b/src/seccompiler/Cargo.toml index 9fd2c3c12ff..50b816a55d0 100644 --- a/src/seccompiler/Cargo.toml +++ b/src/seccompiler/Cargo.toml @@ -21,7 +21,7 @@ clap = { version = "4.5.23", features = ["derive", "string"] } displaydoc = "0.2.5" libc = "0.2.169" serde = { version = "1.0.217", features = ["derive"] } -serde_json = "1.0.135" +serde_json = "1.0.137" thiserror = "2.0.11" zerocopy = { version = "0.8.14" } diff --git a/src/snapshot-editor/Cargo.toml b/src/snapshot-editor/Cargo.toml index a9bd3143b86..7aaa0176142 100644 --- a/src/snapshot-editor/Cargo.toml +++ b/src/snapshot-editor/Cargo.toml @@ -16,7 +16,7 @@ displaydoc = "0.2.5" fc_utils = { package = "utils", path = "../utils" } libc = "0.2.169" log-instrument = { path = "../log-instrument", optional = true } -semver = "1.0.24" +semver = "1.0.25" thiserror = "2.0.11" vmm = { path = "../vmm" } vmm-sys-util = "0.12.1" diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 4e1cf8eed73..891b19aac40 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -12,28 +12,28 @@ bench = false acpi_tables = { path = "../acpi-tables" } aes-gcm = { version = "0.10.1", default-features = false, features = ["aes"] } arrayvec = { version = "0.7.6", optional = true } -aws-lc-rs = { version = "1.12.0", features = ["bindgen"] } +aws-lc-rs = { version = "1.12.1", features = ["bindgen"] } base64 = "0.22.1" bincode = "1.2.1" -bitflags = "2.7.0" +bitflags = "2.8.0" crc64 = "2.0.0" derive_more = { version = "1.0.0", default-features = false, features = ["from", "display"] } displaydoc = "0.2.5" event-manager = "0.4.0" gdbstub = { version = "0.7.3", optional = true } gdbstub_arch = { version = "0.3.1", optional = true } -kvm-bindings = { version = "0.10.0", features = ["fam-wrappers", "serde"] } -kvm-ioctls = "0.19.1" +kvm-bindings = { version = "0.11.0", features = ["fam-wrappers", "serde"] } +kvm-ioctls = "0.20.0" libc = "0.2.169" linux-loader = "0.13.0" -log = { version = "0.4.22", features = ["std", "serde"] } +log = { version = "0.4.25", features = ["std", "serde"] } log-instrument = { path = "../log-instrument", optional = true } memfd = "0.6.3" micro_http = { git = "https://github.com/firecracker-microvm/micro-http" } -semver = { version = "1.0.24", features = ["serde"] } +semver = { version = "1.0.25", features = ["serde"] } serde = { version = "1.0.217", features = ["derive", "rc"] } -serde_json = "1.0.135" +serde_json = "1.0.137" slab = "0.4.7" thiserror = "2.0.11" timerfd = "1.5.0" From 38630d23bb9c0857be0acde51cd070f1bef8d241 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Barb=C3=A1chano?= Date: Tue, 21 Jan 2025 11:46:32 +0100 Subject: [PATCH 55/78] devctr: workaround build failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since this is the 3rd time we have issues with git unsafe directories, just assume all directories are safe. This is acceptable since we control all our git dependencies. Signed-off-by: Pablo Barbáchano --- tools/devctr/ctr_gitconfig | 3 +-- tools/devtool | 3 ++- tools/release.sh | 3 +++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/devctr/ctr_gitconfig b/tools/devctr/ctr_gitconfig index 2ebfb9fcb08..937cb21a308 100644 --- a/tools/devctr/ctr_gitconfig +++ b/tools/devctr/ctr_gitconfig @@ -6,5 +6,4 @@ # https://github.blog/2022-04-12-git-security-vulnerability-announced/ [safe] - directory = /firecracker - directory = /firecracker/.git + directory = * diff --git a/tools/devtool b/tools/devtool index 768b303e8c5..e12048cdb23 100755 --- a/tools/devtool +++ b/tools/devtool @@ -528,10 +528,11 @@ cmd_build() { function cmd_make_release { ensure_build_dir run_devctr \ - --user "$(id -u):$(id -g)" \ + --privileged \ --workdir "$CTR_FC_ROOT_DIR" \ -- \ ./tools/release.sh --libc musl --profile release --make-release + sudo chown -Rc $USER: release* } cmd_distclean() { diff --git a/tools/release.sh b/tools/release.sh index 53d23366793..cb433256932 100755 --- a/tools/release.sh +++ b/tools/release.sh @@ -99,6 +99,9 @@ EOF done +# workaround until we rebuild devctr +git config --global --replace-all safe.directory '*' + ARCH=$(uname -m) VERSION=$(get-firecracker-version) PROFILE_DIR=$(get-profile-dir "$PROFILE") From 11b12fa6ca9b2f38a3025461fd826063def89cca Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 22 Jan 2025 09:35:26 +0000 Subject: [PATCH 56/78] build(deps): Bump the firecracker group across 1 directory with 8 updates Bumps the firecracker group with 5 updates in the / directory: | Package | From | To | | --- | --- | --- | | [clap](https://github.com/clap-rs/clap) | `4.5.26` | `4.5.27` | | [uuid](https://github.com/uuid-rs/uuid) | `1.12.0` | `1.12.1` | | [aws-lc-rs](https://github.com/aws/aws-lc-rs) | `1.12.1` | `1.12.2` | | [is-terminal](https://github.com/sunfishcode/is-terminal) | `0.4.13` | `0.4.14` | | [rustix](https://github.com/bytecodealliance/rustix) | `0.38.43` | `0.38.44` | Updates `clap` from 4.5.26 to 4.5.27 - [Release notes](https://github.com/clap-rs/clap/releases) - [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md) - [Commits](https://github.com/clap-rs/clap/compare/clap_complete-v4.5.26...clap_complete-v4.5.27) Updates `uuid` from 1.12.0 to 1.12.1 - [Release notes](https://github.com/uuid-rs/uuid/releases) - [Commits](https://github.com/uuid-rs/uuid/compare/1.12.0...1.12.1) Updates `aws-lc-rs` from 1.12.1 to 1.12.2 - [Release notes](https://github.com/aws/aws-lc-rs/releases) - [Commits](https://github.com/aws/aws-lc-rs/compare/v1.12.1...v1.12.2) Updates `clap_builder` from 4.5.26 to 4.5.27 - [Release notes](https://github.com/clap-rs/clap/releases) - [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md) - [Commits](https://github.com/clap-rs/clap/compare/v4.5.26...v4.5.27) Updates `is-terminal` from 0.4.13 to 0.4.14 - [Commits](https://github.com/sunfishcode/is-terminal/compare/v0.4.13...v0.4.14) Updates `rustix` from 0.38.43 to 0.38.44 - [Release notes](https://github.com/bytecodealliance/rustix/releases) - [Changelog](https://github.com/bytecodealliance/rustix/blob/main/CHANGELOG.md) - [Commits](https://github.com/bytecodealliance/rustix/compare/v0.38.43...v0.38.44) Updates `uuid-macro-internal` from 1.12.0 to 1.12.1 - [Release notes](https://github.com/uuid-rs/uuid/releases) - [Commits](https://github.com/uuid-rs/uuid/compare/1.12.0...1.12.1) Updates `windows-sys` from 0.52.0 to 0.59.0 - [Release notes](https://github.com/microsoft/windows-rs/releases) - [Commits](https://github.com/microsoft/windows-rs/compare/0.52.0...0.59.0) --- updated-dependencies: - dependency-name: clap dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: uuid dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: aws-lc-rs dependency-type: direct:production update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: clap_builder dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: is-terminal dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: rustix dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: uuid-macro-internal dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: windows-sys dependency-type: indirect update-type: version-update:semver-minor dependency-group: firecracker ... Signed-off-by: dependabot[bot] --- Cargo.lock | 51 ++++++++++++------------------ src/clippy-tracing/Cargo.toml | 4 +-- src/cpu-template-helper/Cargo.toml | 2 +- src/seccompiler/Cargo.toml | 2 +- src/snapshot-editor/Cargo.toml | 2 +- src/vmm/Cargo.toml | 2 +- 6 files changed, 27 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6199774c573..af8793376bb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -98,7 +98,7 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" dependencies = [ - "windows-sys 0.59.0", + "windows-sys", ] [[package]] @@ -109,7 +109,7 @@ checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" dependencies = [ "anstyle", "once_cell", - "windows-sys 0.59.0", + "windows-sys", ] [[package]] @@ -141,9 +141,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.12.1" +version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ea835662a0af02443aa1396d39be523bbf8f11ee6fad20329607c480bea48c3" +checksum = "4c2b7ddaa2c56a367ad27a094ad8ef4faacf8a617c2575acb2ba88949df999ca" dependencies = [ "aws-lc-fips-sys", "aws-lc-sys", @@ -334,9 +334,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.26" +version = "4.5.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8eb5e908ef3a6efbe1ed62520fb7287959888c88485abe072543190ecc66783" +checksum = "769b0145982b4b48713e01ec42d61614425f27b7058bda7180a3a41f30104796" dependencies = [ "clap_builder", "clap_derive", @@ -353,9 +353,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.26" +version = "4.5.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96b01801b5fc6a0a232407abc821660c9c6d25a1cafc0d4f85f29fb8d9afc121" +checksum = "1b26884eb4b57140e4d2d93652abfa49498b938b3c9179f9fc487b0acc3edad7" dependencies = [ "anstream", "anstyle", @@ -584,7 +584,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys", ] [[package]] @@ -721,7 +721,7 @@ version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" dependencies = [ - "windows-sys 0.59.0", + "windows-sys", ] [[package]] @@ -751,13 +751,13 @@ dependencies = [ [[package]] name = "is-terminal" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" +checksum = "3f187290c0ed3dfe3f7c85bedddd320949b68fc86ca0ceb71adfb05b3dc3af2a" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.52.0", + "windows-sys", ] [[package]] @@ -1153,15 +1153,15 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustix" -version = "0.38.43" +version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a78891ee6bf2340288408954ac787aa063d8e8817e9f53abb37c695c6d834ef6" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ "bitflags 2.8.0", "errno", "libc", "linux-raw-sys", - "windows-sys 0.59.0", + "windows-sys", ] [[package]] @@ -1473,9 +1473,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "744018581f9a3454a9e15beb8a33b017183f1e7c0cd170232a2d1453b23a51c4" +checksum = "b3758f5e68192bb96cc8f9b7e2c2cfdabb435499a28499a42f8f984092adad4b" dependencies = [ "getrandom", "rand", @@ -1484,9 +1484,9 @@ dependencies = [ [[package]] name = "uuid-macro-internal" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "144b419c512fdd5eaa4c2998813e32aaab2b257746ee038de93985a99635501d" +checksum = "f8a86d88347b61a0e17b9908a67efcc594130830bf1045653784358dd023e294" dependencies = [ "proc-macro2", "quote", @@ -1653,7 +1653,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.59.0", + "windows-sys", ] [[package]] @@ -1662,15 +1662,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -[[package]] -name = "windows-sys" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" -dependencies = [ - "windows-targets", -] - [[package]] name = "windows-sys" version = "0.59.0" diff --git a/src/clippy-tracing/Cargo.toml b/src/clippy-tracing/Cargo.toml index 489a77bb918..4539472c68f 100644 --- a/src/clippy-tracing/Cargo.toml +++ b/src/clippy-tracing/Cargo.toml @@ -10,7 +10,7 @@ name = "clippy-tracing" bench = false [dependencies] -clap = { version = "4.5.26", features = ["derive"] } +clap = { version = "4.5.27", features = ["derive"] } itertools = "0.14.0" proc-macro2 = { version = "1.0.93", features = ["span-locations"] } quote = "1.0.38" @@ -18,7 +18,7 @@ syn = { version = "2.0.96", features = ["full", "extra-traits", "visit", "visit- walkdir = "2.5.0" [dev-dependencies] -uuid = { version = "1.12.0", features = ["v4"] } +uuid = { version = "1.12.1", features = ["v4"] } [lints] workspace = true diff --git a/src/cpu-template-helper/Cargo.toml b/src/cpu-template-helper/Cargo.toml index 07ac4c310ba..ad6d370aafc 100644 --- a/src/cpu-template-helper/Cargo.toml +++ b/src/cpu-template-helper/Cargo.toml @@ -10,7 +10,7 @@ name = "cpu-template-helper" bench = false [dependencies] -clap = { version = "4.5.26", features = ["derive", "string"] } +clap = { version = "4.5.27", features = ["derive", "string"] } displaydoc = "0.2.5" libc = "0.2.169" log-instrument = { path = "../log-instrument", optional = true } diff --git a/src/seccompiler/Cargo.toml b/src/seccompiler/Cargo.toml index 50b816a55d0..86c8f2e4177 100644 --- a/src/seccompiler/Cargo.toml +++ b/src/seccompiler/Cargo.toml @@ -17,7 +17,7 @@ bench = false [dependencies] bincode = "1.2.1" -clap = { version = "4.5.23", features = ["derive", "string"] } +clap = { version = "4.5.27", features = ["derive", "string"] } displaydoc = "0.2.5" libc = "0.2.169" serde = { version = "1.0.217", features = ["derive"] } diff --git a/src/snapshot-editor/Cargo.toml b/src/snapshot-editor/Cargo.toml index 7aaa0176142..c03acf7d535 100644 --- a/src/snapshot-editor/Cargo.toml +++ b/src/snapshot-editor/Cargo.toml @@ -10,7 +10,7 @@ name = "snapshot-editor" bench = false [dependencies] -clap = { version = "4.5.26", features = ["derive", "string"] } +clap = { version = "4.5.27", features = ["derive", "string"] } displaydoc = "0.2.5" fc_utils = { package = "utils", path = "../utils" } diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 891b19aac40..4bbb146ca18 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -12,7 +12,7 @@ bench = false acpi_tables = { path = "../acpi-tables" } aes-gcm = { version = "0.10.1", default-features = false, features = ["aes"] } arrayvec = { version = "0.7.6", optional = true } -aws-lc-rs = { version = "1.12.1", features = ["bindgen"] } +aws-lc-rs = { version = "1.12.2", features = ["bindgen"] } base64 = "0.22.1" bincode = "1.2.1" bitflags = "2.8.0" From 02b890950ace7ce3d80e597e60ac5e9813b1e356 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Barb=C3=A1chano?= Date: Wed, 22 Jan 2025 14:51:01 +0100 Subject: [PATCH 57/78] fix: make style checks fail if either test fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since `devtool` does not have `-e`, commands fail but don't fail the test run. Fixes: dfb45dc4213bcb1c9704435457e233d3a210dce2 Signed-off-by: Pablo Barbáchano --- tools/devtool | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/devtool b/tools/devtool index e12048cdb23..1e53cc6c6bb 100755 --- a/tools/devtool +++ b/tools/devtool @@ -935,8 +935,8 @@ cmd_mkdocs() { } cmd_checkstyle() { - cmd_test --no-build --no-kvm-check -- -n 4 --dist worksteal integration_tests/style - cmd_test --no-build --no-kvm-check -- -n 4 --doctest-modules framework + cmd_test --no-build --no-kvm-check -- -n 4 --dist worksteal integration_tests/style || exit 1 + cmd_test --no-build --no-kvm-check -- -n 4 --doctest-modules framework || exit 1 } # Check if able to run firecracker. From ddc2c2aa91d2e0af205c22fd0e94cac0f3fc5032 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Mon, 20 Jan 2025 12:27:06 +0000 Subject: [PATCH 58/78] doc: remove devpreview notice from hugepages docs We have decided to move hugepages support out of developer preview, despite missing support for memory ballooning. Signed-off-by: Patrick Roy --- docs/hugepages.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/docs/hugepages.md b/docs/hugepages.md index 05b4e25b399..9102a534c09 100644 --- a/docs/hugepages.md +++ b/docs/hugepages.md @@ -1,10 +1,5 @@ # Backing Guest Memory by Huge Pages -> [!WARNING] -> -> Support is currently in **developer preview**. See -> [this section](RELEASE_POLICY.md#developer-preview-features) for more info. - Firecracker supports backing the guest memory of a VM by 2MB hugetlbfs pages. This can be enabled by setting the `huge_pages` field of `PUT` or `PATCH` requests to the `/machine-config` endpoint to `2M`. From 9054afc9758bcbbad7633980e0db971cd51fa6b7 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Mon, 20 Jan 2025 12:28:51 +0000 Subject: [PATCH 59/78] remove devpreview log message for hugepages they are no longer in devpreview Signed-off-by: Patrick Roy --- src/vmm/src/resources.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index d0c80789681..d4f89e3423b 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; use crate::cpu_config::templates::CustomCpuTemplate; use crate::device_manager::persist::SharedDeviceType; -use crate::logger::{info, log_dev_preview_warning}; +use crate::logger::info; use crate::mmds; use crate::mmds::data_store::{Mmds, MmdsVersion}; use crate::mmds::ns::MmdsNetworkStack; @@ -246,10 +246,6 @@ impl VmResources { &mut self, update: &MachineConfigUpdate, ) -> Result<(), MachineConfigError> { - if update.huge_pages.is_some() && update.huge_pages != Some(HugePageConfig::None) { - log_dev_preview_warning("Huge pages support", None); - } - let updated = self.machine_config.update(update)?; // The VM cannot have a memory size smaller than the target size From acf806dcfbfa33dd84be1c03dcf5e7771f855ca9 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Mon, 20 Jan 2025 12:46:36 +0000 Subject: [PATCH 60/78] allow combination of initrd and hugepages After simply giving it a try, it seems that this combination of features actually works. So remove all the error branches that disallow it, and update the negative test into a positive test. Signed-off-by: Patrick Roy --- docs/hugepages.md | 1 - src/vmm/src/resources.rs | 13 ------ src/vmm/src/vmm_config/boot_source.rs | 2 - src/vmm/src/vmm_config/machine_config.rs | 2 - .../performance/test_huge_pages.py | 41 +++++++------------ 5 files changed, 14 insertions(+), 45 deletions(-) diff --git a/docs/hugepages.md b/docs/hugepages.md index 9102a534c09..a6f18e06dd2 100644 --- a/docs/hugepages.md +++ b/docs/hugepages.md @@ -39,7 +39,6 @@ Currently, hugetlbfs support is mutually exclusive with the following Firecracker features: - Memory Ballooning via the [Balloon Device](./ballooning.md) -- Initrd ## FAQ diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index d4f89e3423b..2928a22c6ca 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -264,13 +264,6 @@ impl VmResources { if self.balloon.get().is_some() && updated.huge_pages != HugePageConfig::None { return Err(MachineConfigError::BalloonAndHugePages); } - - if self.boot_source.config.initrd_path.is_some() - && updated.huge_pages != HugePageConfig::None - { - return Err(MachineConfigError::InitrdAndHugePages); - } - self.machine_config = updated; Ok(()) @@ -337,12 +330,6 @@ impl VmResources { &mut self, boot_source_cfg: BootSourceConfig, ) -> Result<(), BootSourceConfigError> { - if boot_source_cfg.initrd_path.is_some() - && self.machine_config.huge_pages != HugePageConfig::None - { - return Err(BootSourceConfigError::HugePagesAndInitRd); - } - self.boot_source = BootSource { builder: Some(BootConfig::new(&boot_source_cfg)?), config: boot_source_cfg, diff --git a/src/vmm/src/vmm_config/boot_source.rs b/src/vmm/src/vmm_config/boot_source.rs index 1bfed7bad9c..d62338fc94b 100644 --- a/src/vmm/src/vmm_config/boot_source.rs +++ b/src/vmm/src/vmm_config/boot_source.rs @@ -42,8 +42,6 @@ pub enum BootSourceConfigError { InvalidInitrdPath(io::Error), /// The kernel command line is invalid: {0} InvalidKernelCommandLine(String), - /// Firecracker's huge pages support is incompatible with initrds. - HugePagesAndInitRd, } /// Holds the kernel specification (both configuration as well as runtime details). diff --git a/src/vmm/src/vmm_config/machine_config.rs b/src/vmm/src/vmm_config/machine_config.rs index 092179f5ff5..0a140ea890b 100644 --- a/src/vmm/src/vmm_config/machine_config.rs +++ b/src/vmm/src/vmm_config/machine_config.rs @@ -31,8 +31,6 @@ pub enum MachineConfigError { KernelVersion, /// Firecracker's huge pages support is incompatible with memory ballooning. BalloonAndHugePages, - /// Firecracker's huge pages support is incompatible with initrds. - InitrdAndHugePages, } /// Describes the possible (huge)page configurations for a microVM's memory. diff --git a/tests/integration_tests/performance/test_huge_pages.py b/tests/integration_tests/performance/test_huge_pages.py index 8437d78c7d3..2aa45160836 100644 --- a/tests/integration_tests/performance/test_huge_pages.py +++ b/tests/integration_tests/performance/test_huge_pages.py @@ -7,9 +7,10 @@ import pytest from framework import utils -from framework.microvm import HugePagesConfig +from framework.microvm import HugePagesConfig, Serial from framework.properties import global_props from framework.utils_ftrace import ftrace_events +from integration_tests.functional.test_initrd import INITRD_FILESYSTEM from integration_tests.functional.test_uffd import SOCKET_PATH, spawn_pf_handler @@ -259,33 +260,19 @@ def test_negative_huge_pages_plus_initrd(uvm_with_initrd): uvm_with_initrd.spawn() uvm_with_initrd.memory_monitor = None - # Ensure setting huge pages and then telling FC to boot an initrd does not work - with pytest.raises( - RuntimeError, - match="Boot source error: Firecracker's huge pages support is incompatible with initrds.", - ): - # `basic_config` first does a PUT to /machine-config, which will apply the huge pages configuration, - # and then a PUT to /boot-source, which will register the initrd - uvm_with_initrd.basic_config( - boot_args="console=ttyS0 reboot=k panic=1 pci=off", - use_initrd=True, - huge_pages=HugePagesConfig.HUGETLBFS_2MB, - add_root_device=False, - vcpu_count=1, - ) - - # Ensure telling FC about the initrd first and then setting huge pages doesn't work - # This first does a PUT to /machine-config to reset the huge pages configuration, before doing a - # PUT to /boot-source to register the initrd + # `basic_config` first does a PUT to /machine-config, which will apply the huge pages configuration, + # and then a PUT to /boot-source, which will register the initrd uvm_with_initrd.basic_config( - huge_pages=HugePagesConfig.NONE, boot_args="console=ttyS0 reboot=k panic=1 pci=off", use_initrd=True, + huge_pages=HugePagesConfig.HUGETLBFS_2MB, + add_root_device=False, + vcpu_count=1, ) - with pytest.raises( - RuntimeError, - match="Machine config error: Firecracker's huge pages support is incompatible with initrds.", - ): - uvm_with_initrd.api.machine_config.patch( - huge_pages=HugePagesConfig.HUGETLBFS_2MB - ) + + uvm_with_initrd.start() + serial = Serial(uvm_with_initrd) + serial.open() + serial.rx(token="# ") + serial.tx("mount |grep rootfs") + serial.rx(token=f"rootfs on / type {INITRD_FILESYSTEM}") From aeb17549873d70eb3d852174b07f2cf8b6593d3a Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Mon, 20 Jan 2025 12:55:44 +0000 Subject: [PATCH 61/78] test: generalize initrd test Run it with all supported guest kernels, and handle the huge page configuration via pytest parametrization, so that we do not have the same test twice. Signed-off-by: Patrick Roy --- tests/conftest.py | 14 ---------- .../performance/test_huge_pages.py | 27 +------------------ .../test_initrd.py | 19 +++++++++++-- 3 files changed, 18 insertions(+), 42 deletions(-) rename tests/integration_tests/{functional => performance}/test_initrd.py (56%) diff --git a/tests/conftest.py b/tests/conftest.py index 8c81714f716..41e0fbf2721 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -498,20 +498,6 @@ def uvm_plain_debug(microvm_factory, guest_kernel_6_1_debug, rootfs_rw): return microvm_factory.build(guest_kernel_6_1_debug, rootfs_rw) -@pytest.fixture -def uvm_with_initrd( - microvm_factory, guest_kernel_linux_5_10, record_property, artifact_dir -): - """ - See file:../docs/initrd.md - """ - fs = artifact_dir / "initramfs.cpio" - record_property("rootfs", fs.name) - uvm = microvm_factory.build(guest_kernel_linux_5_10) - uvm.initrd_file = fs - yield uvm - - @pytest.fixture def vcpu_count(): """Return default vcpu_count. Use indirect parametrization to override.""" diff --git a/tests/integration_tests/performance/test_huge_pages.py b/tests/integration_tests/performance/test_huge_pages.py index 2aa45160836..51b5dd57418 100644 --- a/tests/integration_tests/performance/test_huge_pages.py +++ b/tests/integration_tests/performance/test_huge_pages.py @@ -7,10 +7,9 @@ import pytest from framework import utils -from framework.microvm import HugePagesConfig, Serial +from framework.microvm import HugePagesConfig from framework.properties import global_props from framework.utils_ftrace import ftrace_events -from integration_tests.functional.test_initrd import INITRD_FILESYSTEM from integration_tests.functional.test_uffd import SOCKET_PATH, spawn_pf_handler @@ -252,27 +251,3 @@ def test_negative_huge_pages_plus_balloon(uvm_plain): match="Machine config error: Firecracker's huge pages support is incompatible with memory ballooning.", ): uvm_plain.basic_config(huge_pages=HugePagesConfig.HUGETLBFS_2MB) - - -def test_negative_huge_pages_plus_initrd(uvm_with_initrd): - """Tests that huge pages and initrd cannot be used together""" - uvm_with_initrd.jailer.daemonize = False - uvm_with_initrd.spawn() - uvm_with_initrd.memory_monitor = None - - # `basic_config` first does a PUT to /machine-config, which will apply the huge pages configuration, - # and then a PUT to /boot-source, which will register the initrd - uvm_with_initrd.basic_config( - boot_args="console=ttyS0 reboot=k panic=1 pci=off", - use_initrd=True, - huge_pages=HugePagesConfig.HUGETLBFS_2MB, - add_root_device=False, - vcpu_count=1, - ) - - uvm_with_initrd.start() - serial = Serial(uvm_with_initrd) - serial.open() - serial.rx(token="# ") - serial.tx("mount |grep rootfs") - serial.rx(token=f"rootfs on / type {INITRD_FILESYSTEM}") diff --git a/tests/integration_tests/functional/test_initrd.py b/tests/integration_tests/performance/test_initrd.py similarity index 56% rename from tests/integration_tests/functional/test_initrd.py rename to tests/integration_tests/performance/test_initrd.py index 3c48ecd63a5..3845e5610c0 100644 --- a/tests/integration_tests/functional/test_initrd.py +++ b/tests/integration_tests/performance/test_initrd.py @@ -1,13 +1,27 @@ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests for initrd.""" +import pytest -from framework.microvm import Serial +from framework.microvm import HugePagesConfig, Serial INITRD_FILESYSTEM = "rootfs" -def test_microvm_initrd_with_serial(uvm_with_initrd): +@pytest.fixture +def uvm_with_initrd(microvm_factory, guest_kernel, record_property, artifact_dir): + """ + See file:../docs/initrd.md + """ + fs = artifact_dir / "initramfs.cpio" + record_property("rootfs", fs.name) + uvm = microvm_factory.build(guest_kernel) + uvm.initrd_file = fs + yield uvm + + +@pytest.mark.parametrize("huge_pages", HugePagesConfig) +def test_microvm_initrd_with_serial(uvm_with_initrd, huge_pages): """ Test that a boot using initrd successfully loads the root filesystem. """ @@ -21,6 +35,7 @@ def test_microvm_initrd_with_serial(uvm_with_initrd): vcpu_count=1, boot_args="console=ttyS0 reboot=k panic=1 pci=off", use_initrd=True, + huge_pages=huge_pages, ) vm.start() From d28ef60fdd59570a61498c4e218498dc72643d00 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Mon, 20 Jan 2025 06:44:57 +0000 Subject: [PATCH 62/78] fix: clarify comments in kani module QUEUE_END is the address of the first byte past the end of our queue indeed, but the queue doesnt start at 0 (it starts at QUEUE_BASE_ADDRESS, which is 512), and thus GUEST_MEMORY_SIZE was 512 byte too large, because it assumed QUEUE_END was also the size of the queue. Closes #4997 Reported-by: Matias Ezequiel Vara Larsen Signed-off-by: Patrick Roy --- src/vmm/src/devices/virtio/queue.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/vmm/src/devices/virtio/queue.rs b/src/vmm/src/devices/virtio/queue.rs index 7f595d7d9f7..d84c4988d62 100644 --- a/src/vmm/src/devices/virtio/queue.rs +++ b/src/vmm/src/devices/virtio/queue.rs @@ -803,10 +803,11 @@ mod verification { const GUEST_MEMORY_BASE: u64 = 512; // We size our guest memory to fit a properly aligned queue, plus some wiggles bytes - // to make sure we not only test queues where all segments are consecutively aligned. + // to make sure we not only test queues where all segments are consecutively aligned (at least + // for those proofs that use a completely arbitrary queue structure). // We need to give at least 16 bytes of buffer space for the descriptor table to be // able to change its address, as it is 16-byte aligned. - const GUEST_MEMORY_SIZE: usize = QUEUE_END as usize + 30; + const GUEST_MEMORY_SIZE: usize = (QUEUE_END - QUEUE_BASE_ADDRESS) as usize + 30; fn guest_memory(memory: *mut u8) -> ProofGuestMemory { // Ideally, we'd want to do @@ -876,8 +877,7 @@ mod verification { const USED_RING_BASE_ADDRESS: u64 = AVAIL_RING_BASE_ADDRESS + 6 + 2 * FIRECRACKER_MAX_QUEUE_SIZE as u64 + 2; - /// The address of the first byte after the queue. Since our queue starts at guest physical - /// address 0, this is also the size of the memory area occupied by the queue. + /// The address of the first byte after the queue (which starts at QUEUE_BASE_ADDRESS). /// Note that the used ring structure has size 6 + 8 * FIRECRACKER_MAX_QUEUE_SIZE const QUEUE_END: u64 = USED_RING_BASE_ADDRESS + 6 + 8 * FIRECRACKER_MAX_QUEUE_SIZE as u64; From a2a34846616c492deb7fd894720ccdc4607360f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Barb=C3=A1chano?= Date: Thu, 23 Jan 2025 09:56:21 +0100 Subject: [PATCH 63/78] test: sync fs before checking for output file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It can happen that we check for the file before `socat` has written it in the filesystem. Sync before checking. Signed-off-by: Pablo Barbáchano --- tests/integration_tests/functional/test_net.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/integration_tests/functional/test_net.py b/tests/integration_tests/functional/test_net.py index a01157dcf76..20a40e677b0 100644 --- a/tests/integration_tests/functional/test_net.py +++ b/tests/integration_tests/functional/test_net.py @@ -115,9 +115,8 @@ def test_tap_offload(uvm_any): ) # Try to send a UDP message from host with UDP offload enabled - cmd = f"ip netns exec {vm.ssh.netns} python3 ./host_tools/udp_offload.py {vm.ssh.host} {port}" - utils.check_output(cmd) + vm.netns.check_output(f"python3 ./host_tools/udp_offload.py {vm.ssh.host} {port}") # Check that the server received the message - ret = vm.ssh.run(f"cat {out_filename}") + ret = vm.ssh.run(f"sync ; cat {out_filename}") assert ret.stdout == message, f"{ret.stdout=} {ret.stderr=}" From 88e0b6e5243331801af0b932dc42fdfa0b5853d7 Mon Sep 17 00:00:00 2001 From: Jack Thomson Date: Wed, 22 Jan 2025 12:22:14 +0000 Subject: [PATCH 64/78] Call kvmclock_ctrl on GDB pause When we pause for GDB call `KVM_KVMCLOCK_CTRL` to prevent guest softlockup watchdog panic. Signed-off-by: Jack Thomson --- CHANGELOG.md | 3 +++ src/vmm/src/vstate/vcpu/mod.rs | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 00e82ee4b19..d6a8e1b2ded 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,9 @@ and this project adheres to `mem_size_mib` and `track_dirty_pages` being mandatory for all `PATCH /machine-config` requests. Now, they can be omitted which leaves these parts of the machine configuration unchanged. +- [#5007](https://github.com/firecracker-microvm/firecracker/pull/5007): Fixed + watchdog softlockup warning on x86_64 guests when a vCPU is paused during GDB + debugging. ## [1.10.1] diff --git a/src/vmm/src/vstate/vcpu/mod.rs b/src/vmm/src/vstate/vcpu/mod.rs index 6a6471193dc..d9aa0abd1a8 100644 --- a/src/vmm/src/vstate/vcpu/mod.rs +++ b/src/vmm/src/vstate/vcpu/mod.rs @@ -319,6 +319,16 @@ impl Vcpu { // If the emulation requests a pause lets do this #[cfg(feature = "gdb")] Ok(VcpuEmulation::Paused) => { + // Calling `KVM_KVMCLOCK_CTRL` to make sure the guest softlockup watchdog + // does not panic on resume, see https://docs.kernel.org/virt/kvm/api.html . + // We do not want to fail if the call is not successful, because depending + // that may be acceptable depending on the workload. + #[cfg(target_arch = "x86_64")] + if let Err(err) = self.kvm_vcpu.fd.kvmclock_ctrl() { + METRICS.vcpu.kvmclock_ctrl_fails.inc(); + warn!("KVM_KVMCLOCK_CTRL call failed {}", err); + } + return StateMachine::next(Self::paused); } // Emulation errors lead to vCPU exit. From 4b6c9b3de864bd88c2755925cb47c89f2bf4c13c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Jan 2025 16:50:48 +0000 Subject: [PATCH 65/78] build(deps): Bump the firecracker group with 6 updates Bumps the firecracker group with 6 updates: | Package | From | To | | --- | --- | --- | | [clap-num](https://github.com/newAM/clap-num) | `1.1.1` | `1.2.0` | | [aws-lc-fips-sys](https://github.com/aws/aws-lc-rs) | `0.13.1` | `0.13.2` | | [cpufeatures](https://github.com/RustCrypto/utils) | `0.2.16` | `0.2.17` | | [crunchy](https://github.com/eira-fransham/crunchy) | `0.2.2` | `0.2.3` | | [is-terminal](https://github.com/sunfishcode/is-terminal) | `0.4.14` | `0.4.15` | | [unicode-ident](https://github.com/dtolnay/unicode-ident) | `1.0.14` | `1.0.15` | Updates `clap-num` from 1.1.1 to 1.2.0 - [Release notes](https://github.com/newAM/clap-num/releases) - [Changelog](https://github.com/newAM/clap-num/blob/main/CHANGELOG.md) - [Commits](https://github.com/newAM/clap-num/compare/1.1.1...1.2.0) Updates `aws-lc-fips-sys` from 0.13.1 to 0.13.2 - [Release notes](https://github.com/aws/aws-lc-rs/releases) - [Commits](https://github.com/aws/aws-lc-rs/compare/aws-lc-fips-sys/v0.13.1...aws-lc-fips-sys/v0.13.2) Updates `cpufeatures` from 0.2.16 to 0.2.17 - [Commits](https://github.com/RustCrypto/utils/compare/cpufeatures-v0.2.16...cpufeatures-v0.2.17) Updates `crunchy` from 0.2.2 to 0.2.3 - [Commits](https://github.com/eira-fransham/crunchy/commits) Updates `is-terminal` from 0.4.14 to 0.4.15 - [Commits](https://github.com/sunfishcode/is-terminal/compare/v0.4.14...v0.4.15) Updates `unicode-ident` from 1.0.14 to 1.0.15 - [Release notes](https://github.com/dtolnay/unicode-ident/releases) - [Commits](https://github.com/dtolnay/unicode-ident/compare/1.0.14...1.0.15) --- updated-dependencies: - dependency-name: clap-num dependency-type: direct:production update-type: version-update:semver-minor dependency-group: firecracker - dependency-name: aws-lc-fips-sys dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: cpufeatures dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: crunchy dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: is-terminal dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker - dependency-name: unicode-ident dependency-type: indirect update-type: version-update:semver-patch dependency-group: firecracker ... Signed-off-by: dependabot[bot] --- Cargo.lock | 24 ++++++++++++------------ src/snapshot-editor/Cargo.toml | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index af8793376bb..de011b07a1d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -126,9 +126,9 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "aws-lc-fips-sys" -version = "0.13.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c7557f6c81ecd3e38582996b31a0f329900586abaae5f092e756686958f22c" +checksum = "33e4aa2478e3ff7e6c2624558abfdad004a57ba975974b4769a4c0d12831f143" dependencies = [ "bindgen 0.69.5", "cc", @@ -344,9 +344,9 @@ dependencies = [ [[package]] name = "clap-num" -version = "1.1.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e063d263364859dc54fb064cedb7c122740cd4733644b14b176c097f51e8ab7" +checksum = "822c4000301ac390e65995c62207501e3ef800a1fc441df913a5e8e4dc374816" dependencies = [ "num-traits", ] @@ -426,9 +426,9 @@ dependencies = [ [[package]] name = "cpufeatures" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b80225097f2e5ae4e7179dd2266824648f3e2f49d9134d584b76389d31c4c3" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" dependencies = [ "libc", ] @@ -475,9 +475,9 @@ dependencies = [ [[package]] name = "crunchy" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" [[package]] name = "crypto-common" @@ -751,9 +751,9 @@ dependencies = [ [[package]] name = "is-terminal" -version = "0.4.14" +version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f187290c0ed3dfe3f7c85bedddd320949b68fc86ca0ceb71adfb05b3dc3af2a" +checksum = "e19b23d53f35ce9f56aebc7d1bb4e6ac1e9c0db7ac85c8d1760c04379edced37" dependencies = [ "hermit-abi", "libc", @@ -1404,9 +1404,9 @@ checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" [[package]] name = "unicode-ident" -version = "1.0.14" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" +checksum = "11cd88e12b17c6494200a9c1b683a04fcac9573ed74cd1b62aeb2727c5592243" [[package]] name = "unicode-xid" diff --git a/src/snapshot-editor/Cargo.toml b/src/snapshot-editor/Cargo.toml index c03acf7d535..b75f490b7b0 100644 --- a/src/snapshot-editor/Cargo.toml +++ b/src/snapshot-editor/Cargo.toml @@ -22,7 +22,7 @@ vmm = { path = "../vmm" } vmm-sys-util = "0.12.1" [target.'cfg(target_arch = "aarch64")'.dependencies] -clap-num = "1.0.2" +clap-num = "1.2.0" [features] tracing = ["log-instrument", "fc_utils/tracing", "vmm/tracing"] From 0d2713b16212b71dab1a0ec66a3624232bdbb685 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Tue, 28 Jan 2025 12:14:04 +0000 Subject: [PATCH 66/78] fix(test): have codecov uploader exit with non-zero code on error Pass the `-Z` flag (fail on error) to the codecov uploader. According to the docs, without this flag upload failures will be silently ignored (why isn't this the default??) [1]. We can also see this from the logs, which contain: ``` [2025-01-28T12:10:57.973Z] ['info'] Codecov will exit with status code 0. If you are expecting a non-zero exit code, please pass in the `-Z` flag ``` The docs say that a long form `--fail-on-error` is available, but actually it doesnt seem to work, so we have to use `-Z`. [1]: https://docs.codecov.com/docs/cli-options Signed-off-by: Patrick Roy --- tests/integration_tests/build/test_coverage.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration_tests/build/test_coverage.py b/tests/integration_tests/build/test_coverage.py index 9369498b377..9dbd97693f6 100644 --- a/tests/integration_tests/build/test_coverage.py +++ b/tests/integration_tests/build/test_coverage.py @@ -95,7 +95,9 @@ def test_coverage(monkeypatch): if not branch: branch = utils.check_output("git rev-parse --abbrev-ref HEAD").stdout - codecov_cmd = f"codecov -f {lcov_file} -F {global_props.host_linux_version}-{global_props.instance}" + # -Z flag means "fail on error". There's supposed to be a more descriptive long form in + # --fail-on-error, but it doesnt work. + codecov_cmd = f"codecov -Z -f {lcov_file} -F {global_props.host_linux_version}-{global_props.instance}" if pr_number and pr_number != "false": codecov_cmd += f" -P {pr_number}" From bb1edd189acca83b6202cb27e129ab9695611351 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Thu, 30 Jan 2025 15:01:15 +0000 Subject: [PATCH 67/78] fix(test): validate restored test works in test_valid_handler In test_valid_handler, we had a comment to validate that the guest still works if we mess with the balloon after UFFD-based restoration. However, we didn't actually do so. Fix this by running some simple SSH command after both inflation and deflation. Signed-off-by: Patrick Roy --- tests/integration_tests/functional/test_uffd.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/integration_tests/functional/test_uffd.py b/tests/integration_tests/functional/test_uffd.py index fbb86c4c987..398529de77d 100644 --- a/tests/integration_tests/functional/test_uffd.py +++ b/tests/integration_tests/functional/test_uffd.py @@ -118,10 +118,14 @@ def test_valid_handler(uvm_plain, snapshot, uffd_handler_paths): # Inflate balloon. vm.api.balloon.patch(amount_mib=200) + # Verify if the restored guest works. + vm.ssh.check_output("true") + # Deflate balloon. vm.api.balloon.patch(amount_mib=0) # Verify if the restored guest works. + vm.ssh.check_output("true") def test_malicious_handler(uvm_plain, snapshot, uffd_handler_paths): From 9536eaa6247c844aea6194ef5b7e1111431074d9 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Thu, 30 Jan 2025 15:18:49 +0000 Subject: [PATCH 68/78] fix: only use mmap trick when restoring from file When the balloon inflates, and the guest gives us back some pages of memory, we need to free those pages. In booted VMs, we do this with madvise(MADV_DONTNEED), and in restored VMs we do it by MAP_FIXED-ing a new VMA on top of the range-to-free. This is because if guest memory is a MAP_PRIVATE of a memory file, madvise has no effect. However, we also do this MAP_FIXED trick if the snapshot is restored with UFFD. In this case, its not needed (madvise works perfectly fine), and in fact, its wrong: If we map over the memory range, UFFD will not receive Remove events for the specified range. Fix this by only using the mmap trick for file-based restored. Fixes #4988 Signed-off-by: Patrick Roy --- src/vmm/src/builder.rs | 1 + src/vmm/src/device_manager/persist.rs | 7 ++++++- src/vmm/src/devices/virtio/balloon/device.rs | 10 +++++----- src/vmm/src/devices/virtio/balloon/persist.rs | 15 ++++++++++++--- src/vmm/src/devices/virtio/balloon/util.rs | 4 ++-- src/vmm/src/lib.rs | 2 -- 6 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 4b43e67541f..fc23d8add0b 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -531,6 +531,7 @@ pub fn build_microvm_from_snapshot( resource_allocator: &mut vmm.resource_allocator, vm_resources, instance_id: &instance_info.id, + restored_from_file: vmm.uffd.is_none(), }; vmm.mmio_device_manager = diff --git a/src/vmm/src/device_manager/persist.rs b/src/vmm/src/device_manager/persist.rs index 5773fa0ba09..bdf63409d68 100644 --- a/src/vmm/src/device_manager/persist.rs +++ b/src/vmm/src/device_manager/persist.rs @@ -214,6 +214,7 @@ pub struct MMIODevManagerConstructorArgs<'a> { pub resource_allocator: &'a mut ResourceAllocator, pub vm_resources: &'a mut VmResources, pub instance_id: &'a str, + pub restored_from_file: bool, } impl fmt::Debug for MMIODevManagerConstructorArgs<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -512,7 +513,10 @@ impl<'a> Persist<'a> for MMIODeviceManager { if let Some(balloon_state) = &state.balloon_device { let device = Arc::new(Mutex::new(Balloon::restore( - BalloonConstructorArgs { mem: mem.clone() }, + BalloonConstructorArgs { + mem: mem.clone(), + restored_from_file: constructor_args.restored_from_file, + }, &balloon_state.device_state, )?)); @@ -807,6 +811,7 @@ mod tests { resource_allocator: &mut resource_allocator, vm_resources, instance_id: "microvm-id", + restored_from_file: true, }; let restored_dev_manager = MMIODeviceManager::restore(restore_args, &device_states).unwrap(); diff --git a/src/vmm/src/devices/virtio/balloon/device.rs b/src/vmm/src/devices/virtio/balloon/device.rs index 697928ae9c6..f6be2536de5 100644 --- a/src/vmm/src/devices/virtio/balloon/device.rs +++ b/src/vmm/src/devices/virtio/balloon/device.rs @@ -164,7 +164,7 @@ pub struct Balloon { pub(crate) irq_trigger: IrqTrigger, // Implementation specific fields. - pub(crate) restored: bool, + pub(crate) restored_from_file: bool, pub(crate) stats_polling_interval_s: u16, pub(crate) stats_timer: TimerFd, // The index of the previous stats descriptor is saved because @@ -189,7 +189,7 @@ impl fmt::Debug for Balloon { .field("queue_evts", &self.queue_evts) .field("device_state", &self.device_state) .field("irq_trigger", &self.irq_trigger) - .field("restored", &self.restored) + .field("restored_from_file", &self.restored_from_file) .field("stats_polling_interval_s", &self.stats_polling_interval_s) .field("stats_desc_index", &self.stats_desc_index) .field("latest_stats", &self.latest_stats) @@ -204,7 +204,7 @@ impl Balloon { amount_mib: u32, deflate_on_oom: bool, stats_polling_interval_s: u16, - restored: bool, + restored_from_file: bool, ) -> Result { let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; @@ -245,7 +245,7 @@ impl Balloon { irq_trigger: IrqTrigger::new().map_err(BalloonError::EventFd)?, device_state: DeviceState::Inactive, activate_evt: EventFd::new(libc::EFD_NONBLOCK).map_err(BalloonError::EventFd)?, - restored, + restored_from_file, stats_polling_interval_s, stats_timer, stats_desc_index: None, @@ -355,7 +355,7 @@ impl Balloon { if let Err(err) = remove_range( mem, (guest_addr, u64::from(range_len) << VIRTIO_BALLOON_PFN_SHIFT), - self.restored, + self.restored_from_file, ) { error!("Error removing memory range: {:?}", err); } diff --git a/src/vmm/src/devices/virtio/balloon/persist.rs b/src/vmm/src/devices/virtio/balloon/persist.rs index 4e768ddd2e2..c1fb6865b5f 100644 --- a/src/vmm/src/devices/virtio/balloon/persist.rs +++ b/src/vmm/src/devices/virtio/balloon/persist.rs @@ -95,6 +95,7 @@ pub struct BalloonState { pub struct BalloonConstructorArgs { /// Pointer to guest memory. pub mem: GuestMemoryMmap, + pub restored_from_file: bool, } impl Persist<'_> for Balloon { @@ -121,7 +122,12 @@ impl Persist<'_> for Balloon { ) -> Result { // We can safely create the balloon with arbitrary flags and // num_pages because we will overwrite them after. - let mut balloon = Balloon::new(0, false, state.stats_polling_interval_s, true)?; + let mut balloon = Balloon::new( + 0, + false, + state.stats_polling_interval_s, + constructor_args.restored_from_file, + )?; let mut num_queues = BALLOON_NUM_QUEUES; // As per the virtio 1.1 specification, the statistics queue @@ -192,13 +198,16 @@ mod tests { // Deserialize and restore the balloon device. let restored_balloon = Balloon::restore( - BalloonConstructorArgs { mem: guest_mem }, + BalloonConstructorArgs { + mem: guest_mem, + restored_from_file: true, + }, &Snapshot::deserialize(&mut mem.as_slice()).unwrap(), ) .unwrap(); assert_eq!(restored_balloon.device_type(), TYPE_BALLOON); - assert!(restored_balloon.restored); + assert!(restored_balloon.restored_from_file); assert_eq!(restored_balloon.acked_features, balloon.acked_features); assert_eq!(restored_balloon.avail_features, balloon.avail_features); diff --git a/src/vmm/src/devices/virtio/balloon/util.rs b/src/vmm/src/devices/virtio/balloon/util.rs index f8cf7aa2000..a9960540a60 100644 --- a/src/vmm/src/devices/virtio/balloon/util.rs +++ b/src/vmm/src/devices/virtio/balloon/util.rs @@ -68,7 +68,7 @@ pub(crate) fn compact_page_frame_numbers(v: &mut [u32]) -> Vec<(u32, u32)> { pub(crate) fn remove_range( guest_memory: &GuestMemoryMmap, range: (GuestAddress, u64), - restored: bool, + restored_from_file: bool, ) -> Result<(), RemoveRegionError> { let (guest_address, range_len) = range; @@ -83,7 +83,7 @@ pub(crate) fn remove_range( // Mmap a new anonymous region over the present one in order to create a hole. // This workaround is (only) needed after resuming from a snapshot because the guest memory // is mmaped from file as private and there is no `madvise` flag that works for this case. - if restored { + if restored_from_file { // SAFETY: The address and length are known to be valid. let ret = unsafe { libc::mmap( diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 6833a3a12d2..618f5d7b6c3 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -314,8 +314,6 @@ pub struct Vmm { vm: Vm, guest_memory: GuestMemoryMmap, // Save UFFD in order to keep it open in the Firecracker process, as well. - // Since this field is never read again, we need to allow `dead_code`. - #[allow(dead_code)] uffd: Option, vcpus_handles: Vec, // Used by Vcpus and devices to initiate teardown; Vmm should never write here. From e92a7ff01aa5e298586af48b19a4533980d01b61 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Thu, 30 Jan 2025 15:47:02 +0000 Subject: [PATCH 69/78] fix(example): correctly handle `remove` events in uffd exammple The crux of the issue was that UFFD gets blocked (all ioctls return -EAGAIN) when there's any `remove` events pending in the queue, which means during processing we not only need to look at the "head" of the queue, but also make sure there's no `remove` events in the "tail". Deal with these scenarios correctly by always greedily reading the entire queue, to ensure there's nothing pending, and only then processing things one-by-one. Please see the new code comments for intricacies with this approach. Fixes #4990 Signed-off-by: Patrick Roy --- .../examples/uffd/fault_all_handler.rs | 2 +- src/firecracker/examples/uffd/uffd_utils.rs | 43 ++++++--- .../examples/uffd/valid_handler.rs | 89 +++++++++++++++---- 3 files changed, 106 insertions(+), 28 deletions(-) diff --git a/src/firecracker/examples/uffd/fault_all_handler.rs b/src/firecracker/examples/uffd/fault_all_handler.rs index 5e9f49a3207..cfeaa099236 100644 --- a/src/firecracker/examples/uffd/fault_all_handler.rs +++ b/src/firecracker/examples/uffd/fault_all_handler.rs @@ -36,7 +36,7 @@ fn main() { userfaultfd::Event::Pagefault { .. } => { for region in uffd_handler.mem_regions.clone() { uffd_handler - .serve_pf(region.mapping.base_host_virt_addr as _, region.mapping.size) + .serve_pf(region.mapping.base_host_virt_addr as _, region.mapping.size); } } _ => panic!("Unexpected event on userfaultfd"), diff --git a/src/firecracker/examples/uffd/uffd_utils.rs b/src/firecracker/examples/uffd/uffd_utils.rs index 8cc70ab7c21..a2f7879f591 100644 --- a/src/firecracker/examples/uffd/uffd_utils.rs +++ b/src/firecracker/examples/uffd/uffd_utils.rs @@ -116,7 +116,7 @@ impl UffdHandler { } } - pub fn serve_pf(&mut self, addr: *mut u8, len: usize) { + pub fn serve_pf(&mut self, addr: *mut u8, len: usize) -> bool { // Find the start of the page that the current faulting address belongs to. let dst = (addr as usize & !(self.page_size - 1)) as *mut libc::c_void; let fault_page_addr = dst as u64; @@ -133,14 +133,18 @@ impl UffdHandler { // event was received. This can be a consequence of guest reclaiming back its // memory from the host (through balloon device) Some(MemPageState::Uninitialized) | Some(MemPageState::FromFile) => { - let (start, end) = self.populate_from_file(region, fault_page_addr, len); - self.update_mem_state_mappings(start, end, MemPageState::FromFile); - return; + match self.populate_from_file(region, fault_page_addr, len) { + Some((start, end)) => { + self.update_mem_state_mappings(start, end, MemPageState::FromFile) + } + None => return false, + } + return true; } Some(MemPageState::Removed) | Some(MemPageState::Anonymous) => { let (start, end) = self.zero_out(fault_page_addr); self.update_mem_state_mappings(start, end, MemPageState::Anonymous); - return; + return true; } None => {} } @@ -152,20 +156,39 @@ impl UffdHandler { ); } - fn populate_from_file(&self, region: &MemRegion, dst: u64, len: usize) -> (u64, u64) { + fn populate_from_file(&self, region: &MemRegion, dst: u64, len: usize) -> Option<(u64, u64)> { let offset = dst - region.mapping.base_host_virt_addr; let src = self.backing_buffer as u64 + region.mapping.offset + offset; let ret = unsafe { - self.uffd - .copy(src as *const _, dst as *mut _, len, true) - .expect("Uffd copy failed") + match self.uffd.copy(src as *const _, dst as *mut _, len, true) { + Ok(value) => value, + // Catch EAGAIN errors, which occur when a `remove` event lands in the UFFD + // queue while we're processing `pagefault` events. + // The weird cast is because the `bytes_copied` field is based on the + // `uffdio_copy->copy` field, which is a signed 64 bit integer, and if something + // goes wrong, it gets set to a -errno code. However, uffd-rs always casts this + // value to an unsigned `usize`, which scrambled the errno. + Err(Error::PartiallyCopied(bytes_copied)) + if bytes_copied == 0 || bytes_copied == (-libc::EAGAIN) as usize => + { + return None + } + Err(Error::CopyFailed(errno)) + if std::io::Error::from(errno).raw_os_error().unwrap() == libc::EEXIST => + { + len + } + Err(e) => { + panic!("Uffd copy failed: {e:?}"); + } + } }; // Make sure the UFFD copied some bytes. assert!(ret > 0); - (dst, dst + len as u64) + Some((dst, dst + len as u64)) } fn zero_out(&mut self, addr: u64) -> (u64, u64) { diff --git a/src/firecracker/examples/uffd/valid_handler.rs b/src/firecracker/examples/uffd/valid_handler.rs index 6c681d932ac..936b9f517a3 100644 --- a/src/firecracker/examples/uffd/valid_handler.rs +++ b/src/firecracker/examples/uffd/valid_handler.rs @@ -26,24 +26,79 @@ fn main() { let mut runtime = Runtime::new(stream, file); runtime.install_panic_hook(); runtime.run(|uffd_handler: &mut UffdHandler| { - // Read an event from the userfaultfd. - let event = uffd_handler - .read_event() - .expect("Failed to read uffd_msg") - .expect("uffd_msg not ready"); - - // We expect to receive either a Page Fault or Removed - // event (if the balloon device is enabled). - match event { - userfaultfd::Event::Pagefault { addr, .. } => { - uffd_handler.serve_pf(addr.cast(), uffd_handler.page_size) + // !DISCLAIMER! + // When using UFFD together with the balloon device, this handler needs to deal with + // `remove` and `pagefault` events. There are multiple things to keep in mind in + // such setups: + // + // As long as any `remove` event is pending in the UFFD queue, all ioctls return EAGAIN + // ----------------------------------------------------------------------------------- + // + // This means we cannot process UFFD events simply one-by-one anymore - if a `remove` event + // arrives, we need to pre-fetch all other events up to the `remove` event, to unblock the + // UFFD, and then go back to the process the pre-fetched events. + // + // UFFD might receive events in not in their causal order + // ----------------------------------------------------- + // + // For example, the guest + // kernel might first respond to a balloon inflation by freeing some memory, and + // telling Firecracker about this. Firecracker will then madvise(MADV_DONTNEED) the + // free memory range, which causes a `remove` event to be sent to UFFD. Then, the + // guest kernel might immediately fault the page in again (for example because + // default_on_oom was set). which causes a `pagefault` event to be sent to UFFD. + // + // However, the pagefault will be triggered from inside KVM on the vCPU thread, while the + // balloon device is handled by Firecracker on its VMM thread. This means that potentially + // this handler can receive the `pagefault` _before_ the `remove` event. + // + // This means that the simple "greedy" strategy of simply prefetching _all_ UFFD events + // to make sure no `remove` event is blocking us can result in the handler acting on + // the `pagefault` event before the `remove` message (despite the `remove` event being + // in the causal past of the `pagefault` event), which means that we will fault in a page + // from the snapshot file, while really we should be faulting in a zero page. + // + // In this example handler, we ignore this problem, to avoid + // complexity (under the assumption that the guest kernel will zero a newly faulted in + // page anyway). A production handler will most likely want to ensure that `remove` + // events for a specific range are always handled before `pagefault` events. + // + // Lastly, we still need to deal with the race condition where a `remove` event arrives + // in the UFFD queue after we got done reading all events, in which case we need to go + // back to reading more events before we can continue processing `pagefault`s. + let mut deferred_events = Vec::new(); + + loop { + // First, try events that we couldn't handle last round + let mut events_to_handle = Vec::from_iter(deferred_events.drain(..)); + + // Read all events from the userfaultfd. + while let Some(event) = uffd_handler.read_event().expect("Failed to read uffd_msg") { + events_to_handle.push(event); + } + + for event in events_to_handle.drain(..) { + // We expect to receive either a Page Fault or `remove` + // event (if the balloon device is enabled). + match event { + userfaultfd::Event::Pagefault { addr, .. } => { + if !uffd_handler.serve_pf(addr.cast(), uffd_handler.page_size) { + deferred_events.push(event); + } + } + userfaultfd::Event::Remove { start, end } => uffd_handler + .update_mem_state_mappings(start as u64, end as u64, MemPageState::Removed), + _ => panic!("Unexpected event on userfaultfd"), + } + } + + // We assume that really only the above removed/pagefault interaction can result in + // deferred events. In that scenario, the loop will always terminate (unless + // newly arriving `remove` events end up indefinitely blocking it, but there's nothing + // we can do about that, and it's a largely theoretical problem). + if deferred_events.is_empty() { + break; } - userfaultfd::Event::Remove { start, end } => uffd_handler.update_mem_state_mappings( - start as u64, - end as u64, - MemPageState::Removed, - ), - _ => panic!("Unexpected event on userfaultfd"), } }); } From f6bd4b630a829975e9020103f613b57bf89f93d1 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Thu, 30 Jan 2025 16:00:40 +0000 Subject: [PATCH 70/78] doc: Update Changelog Add entry about the UFFD fix. Signed-off-by: Patrick Roy --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d6a8e1b2ded..30f5b70330c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,10 @@ and this project adheres to - [#5007](https://github.com/firecracker-microvm/firecracker/pull/5007): Fixed watchdog softlockup warning on x86_64 guests when a vCPU is paused during GDB debugging. +- [#5021](https://github.com/firecracker-microvm/firecracker/pull/5021) If a + balloon device is inflated post UFFD-backed snapshot restore, Firecracker now + causes `remove` UFFD messages to be sent to the UFFD handler. Previously, no + such message would be sent. ## [1.10.1] From 43247e4226c4c7a9414f35e7bdbdf976b1de15b4 Mon Sep 17 00:00:00 2001 From: Andrew Yao Date: Tue, 7 May 2024 11:25:44 -0500 Subject: [PATCH 71/78] Utilized option instead of vector to store irq lines Each MMIO device in Firecracker only utilizes at most one irq line, so capture this property at the type level. Signed-off-by: Andrew Yao Signed-off-by: Patrick Roy --- src/vmm/src/device_manager/mmio.rs | 88 ++++++++++++++---------------- 1 file changed, 41 insertions(+), 47 deletions(-) diff --git a/src/vmm/src/device_manager/mmio.rs b/src/vmm/src/device_manager/mmio.rs index 635bc1bc6e0..cab6e6d367b 100644 --- a/src/vmm/src/device_manager/mmio.rs +++ b/src/vmm/src/device_manager/mmio.rs @@ -7,6 +7,7 @@ use std::collections::HashMap; use std::fmt::Debug; +use std::num::NonZeroU32; use std::sync::{Arc, Mutex}; #[cfg(target_arch = "x86_64")] @@ -79,8 +80,8 @@ pub struct MMIODeviceInfo { pub addr: u64, /// Mmio addr range length. pub len: u64, - /// Used Irq line(s) for the device. - pub irqs: Vec, + /// Used Irq line for the device. + pub irq: Option, // NOTE: guaranteed to be a value not 0, 0 is not allowed } #[cfg(target_arch = "x86_64")] @@ -150,7 +151,12 @@ impl MMIODeviceManager { resource_allocator: &mut ResourceAllocator, irq_count: u32, ) -> Result { - let irqs = resource_allocator.allocate_gsi(irq_count)?; + let irq = match resource_allocator.allocate_gsi(irq_count)?[..] { + [] => None, + [irq] => NonZeroU32::new(irq), + _ => return Err(MmioError::InvalidIrqConfig), + }; + let device_info = MMIODeviceInfo { addr: resource_allocator.allocate_mmio_memory( MMIO_LEN, @@ -158,7 +164,7 @@ impl MMIODeviceManager { AllocPolicy::FirstMatch, )?, len: MMIO_LEN, - irqs, + irq, }; Ok(device_info) } @@ -187,9 +193,9 @@ impl MMIODeviceManager { ) -> Result<(), MmioError> { // Our virtio devices are currently hardcoded to use a single IRQ. // Validate that requirement. - if device_info.irqs.len() != 1 { + let Some(irq) = device_info.irq else { return Err(MmioError::InvalidIrqConfig); - } + }; let identifier; { let locked_device = mmio_device.locked_device(); @@ -201,11 +207,8 @@ impl MMIODeviceManager { vm.register_ioevent(queue_evt, &io_addr, u32::try_from(i).unwrap()) .map_err(MmioError::RegisterIoEvent)?; } - vm.register_irqfd( - &locked_device.interrupt_trigger().irq_evt, - device_info.irqs[0], - ) - .map_err(MmioError::RegisterIrqFd)?; + vm.register_irqfd(&locked_device.interrupt_trigger().irq_evt, irq.get()) + .map_err(MmioError::RegisterIrqFd)?; } self.register_mmio_device( @@ -230,7 +233,7 @@ impl MMIODeviceManager { .add_virtio_mmio_device( device_info.len, GuestAddress(device_info.addr), - device_info.irqs[0], + device_info.irq.unwrap().get(), None, ) .map_err(MmioError::Cmdline) @@ -257,7 +260,7 @@ impl MMIODeviceManager { device_info.len, // We are sure that `irqs` has at least one element; allocate_mmio_resources makes // sure of it. - device_info.irqs[0], + device_info.irq.unwrap().get(), )?; } Ok(device_info) @@ -289,7 +292,7 @@ impl MMIODeviceManager { .unwrap() .serial .interrupt_evt(), - device_info.irqs[0], + device_info.irq.unwrap().get(), ) .map_err(MmioError::RegisterIrqFd)?; @@ -525,7 +528,7 @@ impl DeviceInfoForFDT for MMIODeviceInfo { self.addr } fn irq(&self) -> u32 { - self.irqs[0] + self.irq.unwrap().into() } fn length(&self) -> u64 { self.len @@ -574,11 +577,10 @@ mod tests { #[cfg(target_arch = "x86_64")] /// Gets the number of interrupts used by the devices registered. pub fn used_irqs_count(&self) -> usize { - let mut irq_number = 0; self.get_device_info() .iter() - .for_each(|(_, device_info)| irq_number += device_info.irqs.len()); - irq_number + .filter(|(_, device_info)| device_info.irq.is_some()) + .count() } } @@ -784,7 +786,10 @@ mod tests { ); assert_eq!( crate::arch::IRQ_BASE, - device_manager.id_to_dev_info[&(DeviceType::Virtio(type_id), id)].irqs[0] + device_manager.id_to_dev_info[&(DeviceType::Virtio(type_id), id)] + .irq + .unwrap() + .get() ); let id = "bar"; @@ -821,38 +826,31 @@ mod tests { } #[test] - fn test_slot_irq_allocation() { + fn test_no_irq_allocation() { let mut device_manager = MMIODeviceManager::new(); let mut resource_allocator = ResourceAllocator::new().unwrap(); + let device_info = device_manager .allocate_mmio_resources(&mut resource_allocator, 0) .unwrap(); - assert_eq!(device_info.irqs.len(), 0); + assert!(device_info.irq.is_none()); + } + + #[test] + fn test_irq_allocation() { + let mut device_manager = MMIODeviceManager::new(); + let mut resource_allocator = ResourceAllocator::new().unwrap(); + let device_info = device_manager .allocate_mmio_resources(&mut resource_allocator, 1) .unwrap(); - assert_eq!(device_info.irqs[0], crate::arch::IRQ_BASE); - assert_eq!( - format!( - "{}", - device_manager - .allocate_mmio_resources( - &mut resource_allocator, - crate::arch::IRQ_MAX - crate::arch::IRQ_BASE + 1 - ) - .unwrap_err() - ), - "Failed to allocate requested resource: The requested resource is not available." - .to_string() - ); + assert_eq!(device_info.irq.unwrap().get(), crate::arch::IRQ_BASE); + } - let device_info = device_manager - .allocate_mmio_resources( - &mut resource_allocator, - crate::arch::IRQ_MAX - crate::arch::IRQ_BASE - 1, - ) - .unwrap(); - assert_eq!(device_info.irqs[16], crate::arch::IRQ_BASE + 17); + #[test] + fn test_allocation_failure() { + let mut device_manager = MMIODeviceManager::new(); + let mut resource_allocator = ResourceAllocator::new().unwrap(); assert_eq!( format!( "{}", @@ -860,11 +858,7 @@ mod tests { .allocate_mmio_resources(&mut resource_allocator, 2) .unwrap_err() ), - "Failed to allocate requested resource: The requested resource is not available." - .to_string() + "Invalid MMIO IRQ configuration.".to_string() ); - device_manager - .allocate_mmio_resources(&mut resource_allocator, 0) - .unwrap(); } } From 1a9a236b7e42708a14f14370c8d13e8a575495c0 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Mon, 27 Jan 2025 15:53:35 +0000 Subject: [PATCH 72/78] refactor: remove unused `set_dirty_page_tracking()` function In addition to being unused, it was also wrong, because it only updated the flag on KVM's side, but kept firecracker's tracking disabled. Signed-off-by: Patrick Roy --- src/vmm/src/lib.rs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 618f5d7b6c3..9be5ba53d02 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -629,18 +629,6 @@ impl Vmm { Ok(bitmap) } - /// Enables or disables KVM dirty page tracking. - pub fn set_dirty_page_tracking(&mut self, enable: bool) -> Result<(), VmmError> { - // This function _always_ results in an ioctl update. The VMM is stateless in the sense - // that it's unaware of the current dirty page tracking setting. - // The VMM's consumer will need to cache the dirty tracking setting internally. For - // example, if this function were to be exposed through the VMM controller, the VMM - // resources should cache the flag. - self.vm - .set_kvm_memory_regions(&self.guest_memory, enable) - .map_err(VmmError::Vm) - } - /// Updates the path of the host file backing the emulated block device with id `drive_id`. /// We update the disk image on the device and its virtio configuration. pub fn update_block_device_path( From 441f6a8bbc9b23fb73292ff30a607b7d374c3d6f Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Thu, 30 Jan 2025 16:53:11 +0000 Subject: [PATCH 73/78] refactor: drop `track_dirty_pages` parameter from memory_init We already know about dirty page tracking inside that function, based on whether the memory regions have a bitmap associated with them or not. So drop passing this information in again via a parameter, which saves us quite a bit of plumbing. Suggested-by: Nikita Kalyazin Signed-off-by: Patrick Roy --- src/vmm/src/builder.rs | 9 +++------ src/vmm/src/device_manager/mmio.rs | 6 +++--- src/vmm/src/vstate/vm.rs | 27 ++++++++++++--------------- 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index fc23d8add0b..27de9f6afa6 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -155,7 +155,6 @@ fn create_vmm_and_vcpus( event_manager: &mut EventManager, guest_memory: GuestMemoryMmap, uffd: Option, - track_dirty_pages: bool, vcpu_count: u8, kvm_capabilities: Vec, ) -> Result<(Vmm, Vec), StartMicrovmError> { @@ -172,7 +171,7 @@ fn create_vmm_and_vcpus( kvm.check_memory(&guest_memory) .map_err(VmmError::Kvm) .map_err(StartMicrovmError::Internal)?; - vm.memory_init(&guest_memory, track_dirty_pages) + vm.memory_init(&guest_memory) .map_err(VmmError::Vm) .map_err(StartMicrovmError::Internal)?; @@ -292,7 +291,6 @@ pub fn build_microvm_for_boot( event_manager, guest_memory, None, - vm_resources.machine_config.track_dirty_pages, vm_resources.machine_config.vcpu_count, cpu_template.kvm_capabilities.clone(), )?; @@ -482,7 +480,6 @@ pub fn build_microvm_from_snapshot( event_manager, guest_memory, uffd, - vm_resources.machine_config.track_dirty_pages, vm_resources.machine_config.vcpu_count, microvm_state.kvm_state.kvm_cap_modifiers.clone(), )?; @@ -1140,7 +1137,7 @@ pub(crate) mod tests { let kvm = Kvm::new(vec![]).unwrap(); let mut vm = Vm::new(&kvm).unwrap(); - vm.memory_init(&guest_memory, false).unwrap(); + vm.memory_init(&guest_memory).unwrap(); let mmio_device_manager = MMIODeviceManager::new(); let acpi_device_manager = ACPIDeviceManager::new(); #[cfg(target_arch = "x86_64")] @@ -1394,7 +1391,7 @@ pub(crate) mod tests { let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); #[allow(unused_mut)] let mut vm = Vm::new(&kvm).unwrap(); - vm.memory_init(&guest_memory, false).unwrap(); + vm.memory_init(&guest_memory).unwrap(); let evfd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); #[cfg(target_arch = "x86_64")] diff --git a/src/vmm/src/device_manager/mmio.rs b/src/vmm/src/device_manager/mmio.rs index cab6e6d367b..d35c8b36650 100644 --- a/src/vmm/src/device_manager/mmio.rs +++ b/src/vmm/src/device_manager/mmio.rs @@ -666,7 +666,7 @@ mod tests { let guest_mem = multi_region_mem(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]); let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); let mut vm = Vm::new(&kvm).unwrap(); - vm.memory_init(&guest_mem, false).unwrap(); + vm.memory_init(&guest_mem).unwrap(); let mut device_manager = MMIODeviceManager::new(); let mut resource_allocator = ResourceAllocator::new().unwrap(); @@ -696,7 +696,7 @@ mod tests { let guest_mem = multi_region_mem(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]); let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); let mut vm = Vm::new(&kvm).unwrap(); - vm.memory_init(&guest_mem, false).unwrap(); + vm.memory_init(&guest_mem).unwrap(); let mut device_manager = MMIODeviceManager::new(); let mut resource_allocator = ResourceAllocator::new().unwrap(); @@ -751,7 +751,7 @@ mod tests { let guest_mem = multi_region_mem(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]); let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); let mut vm = Vm::new(&kvm).unwrap(); - vm.memory_init(&guest_mem, false).unwrap(); + vm.memory_init(&guest_mem).unwrap(); let mem_clone = guest_mem.clone(); diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index 1bcf191b8b9..19ca0b2a76c 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -129,12 +129,8 @@ impl Vm { } /// Initializes the guest memory. - pub fn memory_init( - &self, - guest_mem: &GuestMemoryMmap, - track_dirty_pages: bool, - ) -> Result<(), VmError> { - self.set_kvm_memory_regions(guest_mem, track_dirty_pages)?; + pub fn memory_init(&self, guest_mem: &GuestMemoryMmap) -> Result<(), VmError> { + self.set_kvm_memory_regions(guest_mem)?; #[cfg(target_arch = "x86_64")] self.fd .set_tss_address(u64_to_usize(crate::arch::x86_64::layout::KVM_TSS_ADDRESS)) @@ -146,16 +142,17 @@ impl Vm { pub(crate) fn set_kvm_memory_regions( &self, guest_mem: &GuestMemoryMmap, - track_dirty_pages: bool, ) -> Result<(), VmError> { - let mut flags = 0u32; - if track_dirty_pages { - flags |= KVM_MEM_LOG_DIRTY_PAGES; - } guest_mem .iter() .zip(0u32..) .try_for_each(|(region, slot)| { + let flags = if region.bitmap().is_some() { + KVM_MEM_LOG_DIRTY_PAGES + } else { + 0 + }; + let memory_region = kvm_userspace_memory_region { slot, guest_phys_addr: region.start_addr().raw_value(), @@ -359,7 +356,7 @@ pub(crate) mod tests { pub(crate) fn setup_vm_with_memory(mem_size: usize) -> (Kvm, Vm, GuestMemoryMmap) { let (kvm, vm) = setup_vm(); let gm = single_region_mem(mem_size); - vm.memory_init(&gm, false).unwrap(); + vm.memory_init(&gm).unwrap(); (kvm, vm, gm) } @@ -375,7 +372,7 @@ pub(crate) mod tests { let (_, vm) = setup_vm(); // Create valid memory region and test that the initialization is successful. let gm = single_region_mem(0x1000); - vm.memory_init(&gm, true).unwrap(); + vm.memory_init(&gm).unwrap(); } #[cfg(target_arch = "x86_64")] @@ -452,13 +449,13 @@ pub(crate) mod tests { let (_, vm) = setup_vm(); let gm = single_region_mem(0x1000); - let res = vm.set_kvm_memory_regions(&gm, false); + let res = vm.set_kvm_memory_regions(&gm); res.unwrap(); // Trying to set a memory region with a size that is not a multiple of GUEST_PAGE_SIZE // will result in error. let gm = single_region_mem(0x10); - let res = vm.set_kvm_memory_regions(&gm, false); + let res = vm.set_kvm_memory_regions(&gm); assert_eq!( res.unwrap_err().to_string(), "Cannot set the memory regions: Invalid argument (os error 22)" From d8358055d4b2046fe86095cc11fb840f147417cf Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Mon, 27 Jan 2025 15:48:29 +0000 Subject: [PATCH 74/78] refactor: drop `offset` field from `GuestMemoryRegionState` In praxis, the way we wrote our snapshot files has always been just writing all regions in-order. This mean that the offset of a region is simply the sum of the sizes of the preceding regions. The new `GuestMemoryMmap::create` code already computes the offsets for mapping the memory file this way, so drop the explicit calculation at snapshot creation time (as the calculated value isnt used by the restoration anymore). Do not bump the snapshot version number, because we already did so since the last release. Signed-off-by: Patrick Roy --- src/vmm/src/persist.rs | 9 +++++---- src/vmm/src/vstate/memory.rs | 20 ++++++-------------- 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index c9aadad10a9..5c352f3b260 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -584,13 +584,15 @@ fn create_guest_memory( ) -> Result<(GuestMemoryMmap, Vec), GuestMemoryFromUffdError> { let guest_memory = GuestMemoryMmap::from_state(None, mem_state, track_dirty_pages, huge_pages)?; let mut backend_mappings = Vec::with_capacity(guest_memory.num_regions()); - for (mem_region, state_region) in guest_memory.iter().zip(mem_state.regions.iter()) { + let mut offset = 0; + for mem_region in guest_memory.iter() { backend_mappings.push(GuestRegionUffdMapping { base_host_virt_addr: mem_region.as_ptr() as u64, size: mem_region.size(), - offset: state_region.offset, + offset, page_size_kib: huge_pages.page_size_kib(), }); + offset += mem_region.size() as u64; } Ok((guest_memory, backend_mappings)) @@ -770,7 +772,6 @@ mod tests { regions: vec![GuestMemoryRegionState { base_address: 0, size: 0x20000, - offset: 0x10000, }], }; @@ -779,7 +780,7 @@ mod tests { assert_eq!(uffd_regions.len(), 1); assert_eq!(uffd_regions[0].size, 0x20000); - assert_eq!(uffd_regions[0].offset, 0x10000); + assert_eq!(uffd_regions[0].offset, 0); assert_eq!( uffd_regions[0].page_size_kib, HugePageConfig::None.page_size_kib() diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index a84fd6c4be4..6e6674ebef7 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -122,8 +122,6 @@ pub struct GuestMemoryRegionState { pub base_address: u64, /// Region size. pub size: usize, - /// Offset in file/buffer where the region is saved. - pub offset: u64, } /// Describes guest memory regions and their snapshot file mappings. @@ -232,6 +230,7 @@ impl GuestMemoryExtension for GuestMemoryMmap { track_dirty_pages: bool, huge_pages: HugePageConfig, ) -> Result { + let mut offset = 0; match file { Some(f) => { if huge_pages.is_hugetlbfs() { @@ -242,10 +241,12 @@ impl GuestMemoryExtension for GuestMemoryMmap { .regions .iter() .map(|r| { - f.try_clone().map(|file_clone| { - let offset = FileOffset::new(file_clone, r.offset); + let fo = f.try_clone().map(|file_clone| { + let offset = FileOffset::new(file_clone, offset); (offset, GuestAddress(r.base_address), r.size) - }) + }); + offset += r.size as u64; + fo }) .collect::, std::io::Error>>() .map_err(MemoryError::FileError)?; @@ -266,15 +267,11 @@ impl GuestMemoryExtension for GuestMemoryMmap { /// Describes GuestMemoryMmap through a GuestMemoryState struct. fn describe(&self) -> GuestMemoryState { let mut guest_memory_state = GuestMemoryState::default(); - let mut offset = 0; self.iter().for_each(|region| { guest_memory_state.regions.push(GuestMemoryRegionState { base_address: region.start_addr().0, size: u64_to_usize(region.len()), - offset, }); - - offset += region.len(); }); guest_memory_state } @@ -536,7 +533,6 @@ mod tests { regions: vec![GuestMemoryRegionState { base_address: 0, size: 4096, - offset: 0, }], }; let file = TempFile::new().unwrap().into_file(); @@ -652,12 +648,10 @@ mod tests { GuestMemoryRegionState { base_address: 0, size: page_size, - offset: 0, }, GuestMemoryRegionState { base_address: page_size as u64 * 2, size: page_size, - offset: page_size as u64, }, ], }; @@ -679,12 +673,10 @@ mod tests { GuestMemoryRegionState { base_address: 0, size: page_size * 3, - offset: 0, }, GuestMemoryRegionState { base_address: page_size as u64 * 4, size: page_size * 3, - offset: page_size as u64 * 3, }, ], }; From b6f66fa5fd58f57e1bb75ee3de5acb41f5287c12 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Tue, 28 Jan 2025 16:00:43 +0000 Subject: [PATCH 75/78] doc: Include historic 3.0.0 snapshot bump We forgot to include this in the 1.9.0 changelog. Let's retroactively do it. Signed-off-by: Patrick Roy --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 30f5b70330c..544b5a41924 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -122,7 +122,8 @@ and this project adheres to VMGenID support for microVMs running on ARM hosts with 6.1 guest kernels. Support for VMGenID via DeviceTree bindings exists only on mainline 6.10 Linux onwards. Users of Firecracker will need to backport the relevant patches on - top of their 6.1 kernels to make use of the feature. + top of their 6.1 kernels to make use of the feature. As a result, Firecracker + snapshot version is now 3.0.0 - [#4732](https://github.com/firecracker-microvm/firecracker/pull/4732), [#4733](https://github.com/firecracker-microvm/firecracker/pull/4733), [#4741](https://github.com/firecracker-microvm/firecracker/pull/4741), From 45ad785cd0129b148782f6c58a40d4775aeddef4 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Thu, 30 Jan 2025 17:33:10 +0000 Subject: [PATCH 76/78] refactor(test): Replace duplicated code with loops Some tests in memory.rs were runnign effectively the same test scenario twice, the only difference being the state of dirty page tracking. Just use a loop over the two boolean values here to avoid the copy-paste. Also remove a leftover test that was referring to "guard pages", but actually only repeated one of the dirty page tracking blocks. Guard pages were removed in 71cf036e56ce19dcf3144fd26bef6ae728f5464b. Signed-off-by: Patrick Roy --- src/vmm/src/vstate/memory.rs | 53 +++++++----------------------------- 1 file changed, 10 insertions(+), 43 deletions(-) diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index 6e6674ebef7..63e4fdeadc7 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -431,8 +431,7 @@ mod tests { #[test] fn test_from_raw_regions() { - // Check dirty page tracking is off. - { + for dirty_page_tracking in [true, false] { let region_size = 0x10000; let regions = vec![ (GuestAddress(0x0), region_size), @@ -441,27 +440,14 @@ mod tests { (GuestAddress(0x30000), region_size), ]; - let guest_memory = - GuestMemoryMmap::from_raw_regions(®ions, false, HugePageConfig::None).unwrap(); - guest_memory.iter().for_each(|region| { - assert!(region.bitmap().is_none()); - }); - } - - // Check dirty page tracking is on. - { - let region_size = 0x10000; - let regions = vec![ - (GuestAddress(0x0), region_size), - (GuestAddress(0x10000), region_size), - (GuestAddress(0x20000), region_size), - (GuestAddress(0x30000), region_size), - ]; - - let guest_memory = - GuestMemoryMmap::from_raw_regions(®ions, true, HugePageConfig::None).unwrap(); + let guest_memory = GuestMemoryMmap::from_raw_regions( + ®ions, + dirty_page_tracking, + HugePageConfig::None, + ) + .unwrap(); guest_memory.iter().for_each(|region| { - assert!(region.bitmap().is_some()); + assert_eq!(region.bitmap().is_some(), dirty_page_tracking); }); } } @@ -497,32 +483,13 @@ mod tests { ), ]; - // Test that all regions are guarded. - { + for dirty_page_tracking in [true, false] { let guest_memory = GuestMemoryMmap::from_raw_regions_file(regions.clone(), false, false).unwrap(); guest_memory.iter().for_each(|region| { assert_eq!(region.size(), region_size); assert!(region.file_offset().is_some()); - assert!(region.bitmap().is_none()); - }); - } - - // Check dirty page tracking is off. - { - let guest_memory = - GuestMemoryMmap::from_raw_regions_file(regions.clone(), false, false).unwrap(); - guest_memory.iter().for_each(|region| { - assert!(region.bitmap().is_none()); - }); - } - - // Check dirty page tracking is on. - { - let guest_memory = - GuestMemoryMmap::from_raw_regions_file(regions, true, false).unwrap(); - guest_memory.iter().for_each(|region| { - assert!(region.bitmap().is_some()); + assert_eq!(region.bitmap().is_some(), dirty_page_tracking); }); } } From 1bb9d1846b9c808e0a51f0a9bb773d20ba95fe69 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Thu, 30 Jan 2025 17:47:40 +0000 Subject: [PATCH 77/78] refactor: centralize GuestMemoryMmap creation In this day and age, Firecracker supports theoretically 4 different ways of backing guest memory: 1. Normal MAP_ANONYMOUS | MAP_PRIVATE memory 2. memfd backed memory, mapped as shared 3. direct mapping of a snapshot file 4. MAP_ANONYMOUS again, but this time regions are described by snapshot file. We have 3 different functions for creating these different backing stores, which then call each other and vm_memory's APIs. Clean this up by consolidating these into just one function that can be called with generic memory backing options, plus 3 wrappers for the three actually used ways of backing memory. For this, hoist up the hugepages/file-based restore incompatibility check, as with a dedicated function for dealing with the "snapshot restored by mapping file" case, this function simply will not take a huge pages argument, so we have to check this somewhere else. Signed-off-by: Patrick Roy --- .../devices/virtio/block/vhost_user/device.rs | 12 +- .../src/devices/virtio/block/virtio/io/mod.rs | 8 +- src/vmm/src/devices/virtio/vhost_user.rs | 28 +- src/vmm/src/persist.rs | 30 +- src/vmm/src/resources.rs | 4 +- src/vmm/src/test_utils/mod.rs | 2 +- src/vmm/src/vstate/memory.rs | 311 ++++++------------ 7 files changed, 134 insertions(+), 261 deletions(-) diff --git a/src/vmm/src/devices/virtio/block/vhost_user/device.rs b/src/vmm/src/devices/virtio/block/vhost_user/device.rs index 62218157c8b..cdd17e2ea98 100644 --- a/src/vmm/src/devices/virtio/block/vhost_user/device.rs +++ b/src/vmm/src/devices/virtio/block/vhost_user/device.rs @@ -378,7 +378,7 @@ mod tests { use crate::devices::virtio::block::virtio::device::FileEngineType; use crate::devices::virtio::mmio::VIRTIO_MMIO_INT_CONFIG; use crate::test_utils::create_tmp_socket; - use crate::vstate::memory::{FileOffset, GuestAddress, GuestMemoryExtension}; + use crate::vstate::memory::{GuestAddress, GuestMemoryExtension}; #[test] fn test_from_config() { @@ -778,12 +778,10 @@ mod tests { let region_size = 0x10000; let file = TempFile::new().unwrap().into_file(); file.set_len(region_size as u64).unwrap(); - let regions = vec![( - FileOffset::new(file.try_clone().unwrap(), 0x0), - GuestAddress(0x0), - region_size, - )]; - let guest_memory = GuestMemoryMmap::from_raw_regions_file(regions, false, false).unwrap(); + let regions = vec![(GuestAddress(0x0), region_size)]; + let guest_memory = + GuestMemoryMmap::create(regions.into_iter(), libc::MAP_PRIVATE, Some(file), false) + .unwrap(); // During actiavion of the device features, memory and queues should be set and activated. vhost_block.activate(guest_memory).unwrap(); diff --git a/src/vmm/src/devices/virtio/block/virtio/io/mod.rs b/src/vmm/src/devices/virtio/block/virtio/io/mod.rs index 09e86b6968d..cc49dae3eb7 100644 --- a/src/vmm/src/devices/virtio/block/virtio/io/mod.rs +++ b/src/vmm/src/devices/virtio/block/virtio/io/mod.rs @@ -230,8 +230,12 @@ pub mod tests { } fn create_mem() -> GuestMemoryMmap { - GuestMemoryMmap::from_raw_regions(&[(GuestAddress(0), MEM_LEN)], true, HugePageConfig::None) - .unwrap() + GuestMemoryMmap::anonymous( + [(GuestAddress(0), MEM_LEN)].into_iter(), + true, + HugePageConfig::None, + ) + .unwrap() } fn check_dirty_mem(mem: &GuestMemoryMmap, addr: GuestAddress, len: u32) { diff --git a/src/vmm/src/devices/virtio/vhost_user.rs b/src/vmm/src/devices/virtio/vhost_user.rs index ad86c9942af..cca506a57c2 100644 --- a/src/vmm/src/devices/virtio/vhost_user.rs +++ b/src/vmm/src/devices/virtio/vhost_user.rs @@ -466,7 +466,7 @@ mod tests { use super::*; use crate::test_utils::create_tmp_socket; - use crate::vstate::memory::{FileOffset, GuestAddress, GuestMemoryExtension}; + use crate::vstate::memory::{GuestAddress, GuestMemoryExtension}; #[test] fn test_new() { @@ -759,19 +759,13 @@ mod tests { let file_size = 2 * region_size; file.set_len(file_size as u64).unwrap(); let regions = vec![ - ( - FileOffset::new(file.try_clone().unwrap(), 0x0), - GuestAddress(0x0), - region_size, - ), - ( - FileOffset::new(file.try_clone().unwrap(), 0x10000), - GuestAddress(0x10000), - region_size, - ), + (GuestAddress(0x0), region_size), + (GuestAddress(0x10000), region_size), ]; - let guest_memory = GuestMemoryMmap::from_raw_regions_file(regions, false, false).unwrap(); + let guest_memory = + GuestMemoryMmap::create(regions.into_iter(), libc::MAP_PRIVATE, Some(file), false) + .unwrap(); vuh.update_mem_table(&guest_memory).unwrap(); @@ -883,13 +877,11 @@ mod tests { let region_size = 0x10000; let file = TempFile::new().unwrap().into_file(); file.set_len(region_size as u64).unwrap(); - let regions = vec![( - FileOffset::new(file.try_clone().unwrap(), 0x0), - GuestAddress(0x0), - region_size, - )]; + let regions = vec![(GuestAddress(0x0), region_size)]; - let guest_memory = GuestMemoryMmap::from_raw_regions_file(regions, false, false).unwrap(); + let guest_memory = + GuestMemoryMmap::create(regions.into_iter(), libc::MAP_PRIVATE, Some(file), false) + .unwrap(); let mut queue = Queue::new(69); queue.initialize(&guest_memory).unwrap(); diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index 5c352f3b260..3c5f3e7e754 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -448,16 +448,19 @@ pub fn restore_from_snapshot( let mem_state = µvm_state.memory_state; let (guest_memory, uffd) = match params.mem_backend.backend_type { - MemBackendType::File => ( - guest_memory_from_file( - mem_backend_path, - mem_state, - track_dirty_pages, - vm_resources.machine_config.huge_pages, + MemBackendType::File => { + if vm_resources.machine_config.huge_pages.is_hugetlbfs() { + return Err(RestoreFromSnapshotGuestMemoryError::File( + GuestMemoryFromFileError::HugetlbfsSnapshot, + ) + .into()); + } + ( + guest_memory_from_file(mem_backend_path, mem_state, track_dirty_pages) + .map_err(RestoreFromSnapshotGuestMemoryError::File)?, + None, ) - .map_err(RestoreFromSnapshotGuestMemoryError::File)?, - None, - ), + } MemBackendType::Uffd => guest_memory_from_uffd( mem_backend_path, mem_state, @@ -513,17 +516,17 @@ pub enum GuestMemoryFromFileError { File(#[from] std::io::Error), /// Failed to restore guest memory: {0} Restore(#[from] MemoryError), + /// Cannot restore hugetlbfs backed snapshot by mapping the memory file. Please use uffd. + HugetlbfsSnapshot, } fn guest_memory_from_file( mem_file_path: &Path, mem_state: &GuestMemoryState, track_dirty_pages: bool, - huge_pages: HugePageConfig, ) -> Result { let mem_file = File::open(mem_file_path)?; - let guest_mem = - GuestMemoryMmap::from_state(Some(&mem_file), mem_state, track_dirty_pages, huge_pages)?; + let guest_mem = GuestMemoryMmap::snapshot_file(mem_file, mem_state, track_dirty_pages)?; Ok(guest_mem) } @@ -582,7 +585,8 @@ fn create_guest_memory( track_dirty_pages: bool, huge_pages: HugePageConfig, ) -> Result<(GuestMemoryMmap, Vec), GuestMemoryFromUffdError> { - let guest_memory = GuestMemoryMmap::from_state(None, mem_state, track_dirty_pages, huge_pages)?; + let guest_memory = + GuestMemoryMmap::anonymous(mem_state.regions(), track_dirty_pages, huge_pages)?; let mut backend_mappings = Vec::with_capacity(guest_memory.num_regions()); let mut offset = 0; for mem_region in guest_memory.iter() { diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index 2928a22c6ca..d6c5fb31a5a 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -472,8 +472,8 @@ impl VmResources { ) } else { let regions = crate::arch::arch_memory_regions(self.machine_config.mem_size_mib << 20); - GuestMemoryMmap::from_raw_regions( - ®ions, + GuestMemoryMmap::anonymous( + regions.into_iter(), self.machine_config.track_dirty_pages, self.machine_config.huge_pages, ) diff --git a/src/vmm/src/test_utils/mod.rs b/src/vmm/src/test_utils/mod.rs index 1ba79a55231..2ca7f5ce773 100644 --- a/src/vmm/src/test_utils/mod.rs +++ b/src/vmm/src/test_utils/mod.rs @@ -34,7 +34,7 @@ pub fn single_region_mem_at(at: u64, size: usize) -> GuestMemoryMmap { /// Creates a [`GuestMemoryMmap`] with multiple regions and without dirty page tracking. pub fn multi_region_mem(regions: &[(GuestAddress, usize)]) -> GuestMemoryMmap { - GuestMemoryMmap::from_raw_regions(regions, false, HugePageConfig::None) + GuestMemoryMmap::anonymous(regions.iter().copied(), false, HugePageConfig::None) .expect("Cannot initialize memory") } diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs index 63e4fdeadc7..a9e16a8c2e6 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs @@ -8,6 +8,7 @@ use std::fs::File; use std::io::SeekFrom; +use libc::c_int; use serde::{Deserialize, Serialize}; pub use vm_memory::bitmap::{AtomicBitmap, Bitmap, BitmapSlice, BS}; pub use vm_memory::mmap::MmapRegionBuilder; @@ -19,6 +20,7 @@ pub use vm_memory::{ use vm_memory::{Error as VmMemoryError, GuestMemoryError, WriteVolatile}; use vmm_sys_util::errno; +use crate::arch::arch_memory_regions; use crate::utils::{get_page_size, u64_to_usize}; use crate::vmm_config::machine_config::HugePageConfig; use crate::DirtyBitmap; @@ -51,8 +53,6 @@ pub enum MemoryError { Memfd(memfd::Error), /// Cannot resize memfd file: {0} MemfdSetLen(std::io::Error), - /// Cannot restore hugetlbfs backed snapshot by mapping the memory file. Please use uffd. - HugetlbfsSnapshot, } /// Defines the interface for snapshotting memory. @@ -60,35 +60,59 @@ pub trait GuestMemoryExtension where Self: Sized, { + /// Creates a [`GuestMemoryMmap`] with the given configuration + fn create( + regions: impl Iterator, + mmap_flags: libc::c_int, + file: Option, + track_dirty_pages: bool, + ) -> Result; + /// Creates a GuestMemoryMmap with `size` in MiB backed by a memfd. fn memfd_backed( mem_size_mib: usize, track_dirty_pages: bool, huge_pages: HugePageConfig, - ) -> Result; + ) -> Result { + let memfd_file = create_memfd(mem_size_mib, huge_pages.into())?.into_file(); + let regions = arch_memory_regions(mem_size_mib << 20).into_iter(); - /// Creates a GuestMemoryMmap from raw regions. - fn from_raw_regions( - regions: &[(GuestAddress, usize)], - track_dirty_pages: bool, - huge_pages: HugePageConfig, - ) -> Result; + Self::create( + regions, + libc::MAP_SHARED | huge_pages.mmap_flags(), + Some(memfd_file), + track_dirty_pages, + ) + } /// Creates a GuestMemoryMmap from raw regions. - fn from_raw_regions_file( - regions: Vec<(FileOffset, GuestAddress, usize)>, + fn anonymous( + regions: impl Iterator, track_dirty_pages: bool, - shared: bool, - ) -> Result; + huge_pages: HugePageConfig, + ) -> Result { + Self::create( + regions, + libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | huge_pages.mmap_flags(), + None, + track_dirty_pages, + ) + } /// Creates a GuestMemoryMmap given a `file` containing the data /// and a `state` containing mapping information. - fn from_state( - file: Option<&File>, + fn snapshot_file( + file: File, state: &GuestMemoryState, track_dirty_pages: bool, - huge_pages: HugePageConfig, - ) -> Result; + ) -> Result { + Self::create( + state.regions(), + libc::MAP_PRIVATE, + Some(file), + track_dirty_pages, + ) + } /// Describes GuestMemoryMmap through a GuestMemoryState struct. fn describe(&self) -> GuestMemoryState; @@ -131,137 +155,51 @@ pub struct GuestMemoryState { pub regions: Vec, } -impl GuestMemoryExtension for GuestMemoryMmap { - /// Creates a GuestMemoryMmap with `size` in MiB backed by a memfd. - fn memfd_backed( - mem_size_mib: usize, - track_dirty_pages: bool, - huge_pages: HugePageConfig, - ) -> Result { - let memfd_file = create_memfd(mem_size_mib, huge_pages.into())?.into_file(); - - let mut offset: u64 = 0; - let regions = crate::arch::arch_memory_regions(mem_size_mib << 20) +impl GuestMemoryState { + /// Turns this [`GuestMemoryState`] into a description of guest memory regions as understood + /// by the creation functions of [`GuestMemoryExtensions`] + pub fn regions(&self) -> impl Iterator + '_ { + self.regions .iter() - .map(|(guest_address, region_size)| { - let file_clone = memfd_file.try_clone().map_err(MemoryError::FileError)?; - let file_offset = FileOffset::new(file_clone, offset); - offset += *region_size as u64; - Ok((file_offset, *guest_address, *region_size)) - }) - .collect::, MemoryError>>()?; - - Self::from_raw_regions_file(regions, track_dirty_pages, true) + .map(|region| (GuestAddress(region.base_address), region.size)) } +} - /// Creates a GuestMemoryMmap from raw regions backed by anonymous memory. - fn from_raw_regions( - regions: &[(GuestAddress, usize)], +impl GuestMemoryExtension for GuestMemoryMmap { + fn create( + regions: impl Iterator, + mmap_flags: c_int, + file: Option, track_dirty_pages: bool, - huge_pages: HugePageConfig, ) -> Result { - let prot = libc::PROT_READ | libc::PROT_WRITE; - // MAP_NORESERVE for 4K-backed page regions means that no swap space will be reserved for - // the region. For hugetlbfs regions, it means that pages in the hugetlbfs pool will - // not be reserved at mmap-time. This means that instead of failing at mmap-time if - // the hugetlbfs page pool is too small to accommodate the entire VM, Firecracker might - // receive a SIGBUS if a pagefault ever cannot be served due to the pool being depleted. - let flags = - libc::MAP_NORESERVE | libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | huge_pages.mmap_flags(); - + let mut offset = 0; let regions = regions - .iter() - .map(|(guest_address, region_size)| { - let bitmap = match track_dirty_pages { - true => Some(AtomicBitmap::with_len(*region_size)), - false => None, - }; - let region = MmapRegionBuilder::new_with_bitmap(*region_size, bitmap) - .with_mmap_prot(prot) - .with_mmap_flags(flags) - .build() - .map_err(MemoryError::MmapRegionError)?; - - GuestRegionMmap::new(region, *guest_address).map_err(MemoryError::VmMemoryError) - }) - .collect::, MemoryError>>()?; + .map(|(start, size)| { + let mut builder = MmapRegionBuilder::new_with_bitmap( + size, + track_dirty_pages.then(|| AtomicBitmap::with_len(size)), + ) + .with_mmap_prot(libc::PROT_READ | libc::PROT_WRITE) + .with_mmap_flags(libc::MAP_NORESERVE | mmap_flags); - GuestMemoryMmap::from_regions(regions).map_err(MemoryError::VmMemoryError) - } + if let Some(ref file) = file { + let file_offset = + FileOffset::new(file.try_clone().map_err(MemoryError::FileError)?, offset); - /// Creates a GuestMemoryMmap from raw regions backed by file. - fn from_raw_regions_file( - regions: Vec<(FileOffset, GuestAddress, usize)>, - track_dirty_pages: bool, - shared: bool, - ) -> Result { - let prot = libc::PROT_READ | libc::PROT_WRITE; - let flags = if shared { - libc::MAP_NORESERVE | libc::MAP_SHARED - } else { - libc::MAP_NORESERVE | libc::MAP_PRIVATE - }; - let regions = regions - .into_iter() - .map(|(file_offset, guest_address, region_size)| { - let bitmap = match track_dirty_pages { - true => Some(AtomicBitmap::with_len(region_size)), - false => None, - }; - let region = MmapRegionBuilder::new_with_bitmap(region_size, bitmap) - .with_mmap_prot(prot) - .with_mmap_flags(flags) - .with_file_offset(file_offset) - .build() - .map_err(MemoryError::MmapRegionError)?; - - GuestRegionMmap::new(region, guest_address).map_err(MemoryError::VmMemoryError) - }) - .collect::, MemoryError>>()?; + builder = builder.with_file_offset(file_offset); + } - GuestMemoryMmap::from_regions(regions).map_err(MemoryError::VmMemoryError) - } + offset += size as u64; - /// Creates a GuestMemoryMmap backed by a `file` if present, otherwise backed - /// by anonymous memory. Memory layout and ranges are described in `state` param. - fn from_state( - file: Option<&File>, - state: &GuestMemoryState, - track_dirty_pages: bool, - huge_pages: HugePageConfig, - ) -> Result { - let mut offset = 0; - match file { - Some(f) => { - if huge_pages.is_hugetlbfs() { - return Err(MemoryError::HugetlbfsSnapshot); - } + GuestRegionMmap::new( + builder.build().map_err(MemoryError::MmapRegionError)?, + start, + ) + .map_err(MemoryError::VmMemoryError) + }) + .collect::, _>>()?; - let regions = state - .regions - .iter() - .map(|r| { - let fo = f.try_clone().map(|file_clone| { - let offset = FileOffset::new(file_clone, offset); - (offset, GuestAddress(r.base_address), r.size) - }); - offset += r.size as u64; - fo - }) - .collect::, std::io::Error>>() - .map_err(MemoryError::FileError)?; - - Self::from_raw_regions_file(regions, track_dirty_pages, false) - } - None => { - let regions = state - .regions - .iter() - .map(|r| (GuestAddress(r.base_address), r.size)) - .collect::>(); - Self::from_raw_regions(®ions, track_dirty_pages, huge_pages) - } - } + GuestMemoryMmap::from_regions(regions).map_err(MemoryError::VmMemoryError) } /// Describes GuestMemoryMmap through a GuestMemoryState struct. @@ -430,7 +368,7 @@ mod tests { use crate::utils::get_page_size; #[test] - fn test_from_raw_regions() { + fn test_anonymous() { for dirty_page_tracking in [true, false] { let region_size = 0x10000; let regions = vec![ @@ -440,8 +378,8 @@ mod tests { (GuestAddress(0x30000), region_size), ]; - let guest_memory = GuestMemoryMmap::from_raw_regions( - ®ions, + let guest_memory = GuestMemoryMmap::anonymous( + regions.into_iter(), dirty_page_tracking, HugePageConfig::None, ) @@ -452,66 +390,6 @@ mod tests { } } - #[test] - fn test_from_raw_regions_file() { - let region_size = 0x10000; - - let file = TempFile::new().unwrap().into_file(); - let file_size = 4 * region_size; - file.set_len(file_size as u64).unwrap(); - - let regions = vec![ - ( - FileOffset::new(file.try_clone().unwrap(), 0x0), - GuestAddress(0x0), - region_size, - ), - ( - FileOffset::new(file.try_clone().unwrap(), 0x10000), - GuestAddress(0x10000), - region_size, - ), - ( - FileOffset::new(file.try_clone().unwrap(), 0x20000), - GuestAddress(0x20000), - region_size, - ), - ( - FileOffset::new(file.try_clone().unwrap(), 0x30000), - GuestAddress(0x30000), - region_size, - ), - ]; - - for dirty_page_tracking in [true, false] { - let guest_memory = - GuestMemoryMmap::from_raw_regions_file(regions.clone(), false, false).unwrap(); - guest_memory.iter().for_each(|region| { - assert_eq!(region.size(), region_size); - assert!(region.file_offset().is_some()); - assert_eq!(region.bitmap().is_some(), dirty_page_tracking); - }); - } - } - - #[test] - fn test_from_state() { - let state = GuestMemoryState { - regions: vec![GuestMemoryRegionState { - base_address: 0, - size: 4096, - }], - }; - let file = TempFile::new().unwrap().into_file(); - - // No mapping of snapshots that were taken with hugetlbfs enabled - let err = - GuestMemoryMmap::from_state(Some(&file), &state, false, HugePageConfig::Hugetlbfs2M) - .unwrap_err(); - - assert!(matches!(err, MemoryError::HugetlbfsSnapshot), "{:?}", err); - } - #[test] fn test_mark_dirty() { let page_size = get_page_size().unwrap(); @@ -523,7 +401,7 @@ mod tests { (GuestAddress(region_size as u64 * 2), region_size), // pages 6-8 ]; let guest_memory = - GuestMemoryMmap::from_raw_regions(®ions, true, HugePageConfig::None).unwrap(); + GuestMemoryMmap::anonymous(regions.into_iter(), true, HugePageConfig::None).unwrap(); let dirty_map = [ // page 0: not dirty @@ -578,8 +456,8 @@ mod tests { let region_size = page_size * 3; // Test with a single region - let guest_memory = GuestMemoryMmap::from_raw_regions( - &[(GuestAddress(0), region_size)], + let guest_memory = GuestMemoryMmap::anonymous( + [(GuestAddress(0), region_size)].into_iter(), false, HugePageConfig::None, ) @@ -593,7 +471,7 @@ mod tests { (GuestAddress(region_size as u64 * 2), region_size), // pages 6-8 ]; let guest_memory = - GuestMemoryMmap::from_raw_regions(®ions, true, HugePageConfig::None).unwrap(); + GuestMemoryMmap::anonymous(regions.into_iter(), true, HugePageConfig::None).unwrap(); check_serde(&guest_memory); } @@ -607,7 +485,7 @@ mod tests { (GuestAddress(page_size as u64 * 2), page_size), ]; let guest_memory = - GuestMemoryMmap::from_raw_regions(&mem_regions[..], true, HugePageConfig::None) + GuestMemoryMmap::anonymous(mem_regions.into_iter(), true, HugePageConfig::None) .unwrap(); let expected_memory_state = GuestMemoryState { @@ -632,7 +510,7 @@ mod tests { (GuestAddress(page_size as u64 * 4), page_size * 3), ]; let guest_memory = - GuestMemoryMmap::from_raw_regions(&mem_regions[..], true, HugePageConfig::None) + GuestMemoryMmap::anonymous(mem_regions.into_iter(), true, HugePageConfig::None) .unwrap(); let expected_memory_state = GuestMemoryState { @@ -665,7 +543,8 @@ mod tests { (region_2_address, region_size), ]; let guest_memory = - GuestMemoryMmap::from_raw_regions(&mem_regions, true, HugePageConfig::None).unwrap(); + GuestMemoryMmap::anonymous(mem_regions.into_iter(), true, HugePageConfig::None) + .unwrap(); // Check that Firecracker bitmap is clean. guest_memory.iter().for_each(|r| { assert!(!r.bitmap().dirty_at(0)); @@ -687,13 +566,8 @@ mod tests { let mut memory_file = TempFile::new().unwrap().into_file(); guest_memory.dump(&mut memory_file).unwrap(); - let restored_guest_memory = GuestMemoryMmap::from_state( - Some(&memory_file), - &memory_state, - false, - HugePageConfig::None, - ) - .unwrap(); + let restored_guest_memory = + GuestMemoryMmap::snapshot_file(memory_file, &memory_state, false).unwrap(); // Check that the region contents are the same. let mut restored_region = vec![0u8; page_size * 2]; @@ -721,7 +595,8 @@ mod tests { (region_2_address, region_size), ]; let guest_memory = - GuestMemoryMmap::from_raw_regions(&mem_regions, true, HugePageConfig::None).unwrap(); + GuestMemoryMmap::anonymous(mem_regions.into_iter(), true, HugePageConfig::None) + .unwrap(); // Check that Firecracker bitmap is clean. guest_memory.iter().for_each(|r| { assert!(!r.bitmap().dirty_at(0)); @@ -751,8 +626,7 @@ mod tests { // We can restore from this because this is the first dirty dump. let restored_guest_memory = - GuestMemoryMmap::from_state(Some(&file), &memory_state, false, HugePageConfig::None) - .unwrap(); + GuestMemoryMmap::snapshot_file(file, &memory_state, false).unwrap(); // Check that the region contents are the same. let mut restored_region = vec![0u8; region_size]; @@ -809,7 +683,8 @@ mod tests { (region_2_address, region_size), ]; let guest_memory = - GuestMemoryMmap::from_raw_regions(&mem_regions, true, HugePageConfig::None).unwrap(); + GuestMemoryMmap::anonymous(mem_regions.into_iter(), true, HugePageConfig::None) + .unwrap(); // Check that Firecracker bitmap is clean. guest_memory.iter().for_each(|r| { From 30c77f389437c3feb8b7afc9a9322541bb0cc019 Mon Sep 17 00:00:00 2001 From: Nikita Kalyazin Date: Mon, 3 Feb 2025 16:25:58 +0000 Subject: [PATCH 78/78] test(conftest): save ssh key on test failure Since ssh keys are now generated on the test machine, we need to preserve the key in test artifacts to be able to debug a test failure. Signed-off-by: Nikita Kalyazin --- tests/conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 41e0fbf2721..552ba25c4ef 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,6 +22,7 @@ import inspect import json import os +import platform import shutil import sys import tempfile @@ -340,6 +341,7 @@ def microvm_factory(request, record_property, results_dir, netns_factory): uvm_data = results_dir / uvm.id uvm_data.mkdir() uvm_data.joinpath("host-dmesg.log").write_text(dmesg.stdout) + shutil.copy(f"/firecracker/build/img/{platform.machine()}/id_rsa", uvm_data) uvm_root = Path(uvm.chroot()) for item in os.listdir(uvm_root):