Skip to content

[24/n] [reconfigurator-planning] support no-op image source updates #8486

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

206 changes: 178 additions & 28 deletions dev-tools/reconfigurator-cli/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
//! developer REPL for driving blueprint planning

use anyhow::{Context, anyhow, bail};
use camino::Utf8PathBuf;
use camino::{Utf8Path, Utf8PathBuf};
use clap::ValueEnum;
use clap::{Args, Parser, Subcommand};
use iddqd::IdOrdMap;
Expand All @@ -20,9 +20,9 @@ use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder;
use nexus_reconfigurator_planning::example::ExampleSystemBuilder;
use nexus_reconfigurator_planning::planner::Planner;
use nexus_reconfigurator_planning::system::{SledBuilder, SystemDescription};
use nexus_reconfigurator_simulation::SimStateBuilder;
use nexus_reconfigurator_simulation::Simulator;
use nexus_reconfigurator_simulation::{BlueprintId, SimState};
use nexus_reconfigurator_simulation::{SimStateBuilder, SimTufRepoSource};
use nexus_reconfigurator_simulation::{SimTufRepoDescription, Simulator};
use nexus_sled_agent_shared::inventory::ZoneKind;
use nexus_types::deployment::PlanningInput;
use nexus_types::deployment::SledFilter;
Expand All @@ -39,18 +39,19 @@ use nexus_types::deployment::{OmicronZoneNic, TargetReleaseDescription};
use nexus_types::external_api::views::SledPolicy;
use nexus_types::external_api::views::SledProvisionPolicy;
use omicron_common::address::REPO_DEPOT_PORT;
use omicron_common::api::external::Generation;
use omicron_common::api::external::Name;
use omicron_common::api::external::{Generation, TufRepoDescription};
use omicron_common::policy::NEXUS_REDUNDANCY;
use omicron_common::update::OmicronZoneManifestSource;
use omicron_repl_utils::run_repl_from_file;
use omicron_repl_utils::run_repl_on_stdin;
use omicron_uuid_kinds::CollectionUuid;
use omicron_uuid_kinds::GenericUuid;
use omicron_uuid_kinds::OmicronZoneUuid;
use omicron_uuid_kinds::ReconfiguratorSimUuid;
use omicron_uuid_kinds::SledUuid;
use omicron_uuid_kinds::VnicUuid;
use omicron_uuid_kinds::{BlueprintUuid, MupdateOverrideUuid};
use omicron_uuid_kinds::{CollectionUuid, MupdateUuid};
use std::borrow::Cow;
use std::convert::Infallible;
use std::fmt::{self, Write};
Expand Down Expand Up @@ -220,6 +221,9 @@ fn process_command(
Commands::SledRemove(args) => cmd_sled_remove(sim, args),
Commands::SledShow(args) => cmd_sled_show(sim, args),
Commands::SledSetPolicy(args) => cmd_sled_set_policy(sim, args),
Commands::SledUpdateInstallDataset(args) => {
cmd_sled_update_install_dataset(sim, args)
}
Commands::SledUpdateSp(args) => cmd_sled_update_sp(sim, args),
Commands::SiloList => cmd_silo_list(sim),
Commands::SiloAdd(args) => cmd_silo_add(sim, args),
Expand Down Expand Up @@ -275,6 +279,8 @@ enum Commands {
SledShow(SledArgs),
/// set a sled's policy
SledSetPolicy(SledSetPolicyArgs),
/// update the install dataset on a sled, simulating a mupdate
SledUpdateInstallDataset(SledUpdateInstallDatasetArgs),
/// simulate updating the sled's SP versions
SledUpdateSp(SledUpdateSpArgs),

Expand Down Expand Up @@ -395,6 +401,52 @@ impl From<SledPolicyOpt> for SledPolicy {
}
}

#[derive(Debug, Args)]
struct SledUpdateInstallDatasetArgs {
/// id of the sled
sled_id: SledOpt,

#[clap(flatten)]
source: SledMupdateSource,
}

#[derive(Debug, Args)]
// This makes it so that only one source can be specified.
struct SledMupdateSource {
#[clap(flatten)]
valid: SledMupdateValidSource,

/// set the mupdate source to Installinator with the given ID
#[clap(long, requires = "sled-mupdate-valid-source")]
mupdate_id: Option<MupdateUuid>,

/// simulate an error reading the zone manifest
#[clap(long, conflicts_with = "sled-mupdate-valid-source")]
with_manifest_error: bool,

/// simulate an error validating zones by this artifact ID name
///
/// This uses the `artifact_id_name` representation of a zone kind.
#[clap(
long,
value_name = "ARTIFACT_ID_NAME",
requires = "sled-mupdate-valid-source"
)]
with_zone_error: Vec<String>,
}

#[derive(Debug, Args)]
#[group(id = "sled-mupdate-valid-source", multiple = false)]
struct SledMupdateValidSource {
/// the TUF repo.zip to simulate the mupdate from
#[clap(long)]
from_repo: Option<Utf8PathBuf>,

/// simulate a mupdate to the target release
#[clap(long)]
to_target_release: bool,
}

#[derive(Debug, Args)]
struct SledUpdateSpArgs {
/// id of the sled
Expand Down Expand Up @@ -879,6 +931,10 @@ struct TufAssembleArgs {
/// The tufaceous manifest path (relative to this crate's root)
manifest_path: Utf8PathBuf,

/// Allow non-semver artifact versions.
#[clap(long)]
allow_non_semver: bool,

#[clap(
long,
// Use help here rather than a doc comment because rustdoc doesn't like
Expand Down Expand Up @@ -1156,6 +1212,32 @@ fn cmd_sled_set_policy(
Ok(Some(format!("set sled {} policy to {}", sled_id, args.policy)))
}

fn cmd_sled_update_install_dataset(
sim: &mut ReconfiguratorSim,
args: SledUpdateInstallDatasetArgs,
) -> anyhow::Result<Option<String>> {
let description = mupdate_source_to_description(sim, &args.source)?;

let mut state = sim.current_state().to_mut();
let system = state.system_mut();
let sled_id = args.sled_id.to_sled_id(system.description())?;
system
.description_mut()
.sled_set_zone_manifest(sled_id, description.to_boot_inventory())?;

sim.commit_and_bump(
format!(
"reconfigurator-cli sled-update-install-dataset: {}",
description.message,
),
state,
);
Ok(Some(format!(
"sled {}: install dataset updated: {}",
sled_id, description.message
)))
}

fn cmd_sled_update_sp(
sim: &mut ReconfiguratorSim,
args: SledUpdateSpArgs,
Expand Down Expand Up @@ -1955,26 +2037,8 @@ fn cmd_set(
rv
}
SetArgs::TargetRelease { filename } => {
let file = std::fs::File::open(&filename)
.with_context(|| format!("open {:?}", filename))?;
let buf = std::io::BufReader::new(file);
let rt = tokio::runtime::Runtime::new()
.context("creating tokio runtime")?;
// We're not using the repo hash here. Make one up.
let repo_hash = ArtifactHash([0; 32]);
let artifacts_with_plan = rt.block_on(async {
ArtifactsWithPlan::from_zip(
buf,
None,
repo_hash,
ControlPlaneZonesMode::Split,
&sim.log,
)
.await
.with_context(|| format!("unpacking {:?}", filename))
})?;
let description = artifacts_with_plan.description().clone();
drop(artifacts_with_plan);
let description =
extract_tuf_repo_description(&sim.log, &filename)?;
state.system_mut().description_mut().set_target_release(
TargetReleaseDescription::TufRepo(description),
);
Expand All @@ -1986,6 +2050,84 @@ fn cmd_set(
Ok(Some(rv))
}

/// Converts a mupdate source to a TUF repo description.
fn mupdate_source_to_description(
sim: &ReconfiguratorSim,
source: &SledMupdateSource,
) -> anyhow::Result<SimTufRepoDescription> {
let manifest_source = match source.mupdate_id {
Some(mupdate_id) => {
OmicronZoneManifestSource::Installinator { mupdate_id }
}
None => OmicronZoneManifestSource::SledAgent,
};
if let Some(repo_path) = &source.valid.from_repo {
let description = extract_tuf_repo_description(&sim.log, repo_path)?;
let mut sim_source = SimTufRepoSource::new(
description,
manifest_source,
format!("from repo at {repo_path}"),
)?;
sim_source.simulate_zone_errors(&source.with_zone_error)?;
Ok(SimTufRepoDescription::new(sim_source))
} else if source.valid.to_target_release {
let description = sim
.current_state()
.system()
.description()
.target_release()
.description();
match description {
TargetReleaseDescription::Initial => {
bail!(
"cannot mupdate zones without a target release \
(use `set target-release` or --from-repo)"
)
}
TargetReleaseDescription::TufRepo(desc) => {
let mut sim_source = SimTufRepoSource::new(
desc.clone(),
manifest_source,
"to target release".to_owned(),
)?;
sim_source.simulate_zone_errors(&source.with_zone_error)?;
Ok(SimTufRepoDescription::new(sim_source))
}
}
} else if source.with_manifest_error {
Ok(SimTufRepoDescription::new_error(
"simulated error obtaining zone manifest".to_owned(),
))
} else {
bail!("an update source must be specified")
}
}

fn extract_tuf_repo_description(
log: &slog::Logger,
filename: &Utf8Path,
) -> anyhow::Result<TufRepoDescription> {
let file = std::fs::File::open(filename)
.with_context(|| format!("open {:?}", filename))?;
let buf = std::io::BufReader::new(file);
let rt =
tokio::runtime::Runtime::new().context("creating tokio runtime")?;
let repo_hash = ArtifactHash([0; 32]);
let artifacts_with_plan = rt.block_on(async {
ArtifactsWithPlan::from_zip(
buf,
None,
repo_hash,
ControlPlaneZonesMode::Split,
log,
)
.await
.with_context(|| format!("unpacking {:?}", filename))
})?;
let description = artifacts_with_plan.description().clone();
Ok(description)
}

fn cmd_tuf_assemble(
sim: &ReconfiguratorSim,
args: TufAssembleArgs,
Expand Down Expand Up @@ -2016,18 +2158,26 @@ fn cmd_tuf_assemble(
Utf8PathBuf::from(format!("repo-{}.zip", manifest.system_version))
};

if output_path.exists() {
bail!("output path `{output_path}` already exists");
}

// Just use a fixed key for now.
//
// In the future we may want to test changing the TUF key.
let args = tufaceous::Args::try_parse_from([
let mut tufaceous_args = vec![
"tufaceous",
"--key",
DEFAULT_TUFACEOUS_KEY,
"assemble",
manifest_path.as_str(),
output_path.as_str(),
])
.expect("args are valid so this shouldn't fail");
];
if args.allow_non_semver {
tufaceous_args.push("--allow-non-semver");
}
let args = tufaceous::Args::try_parse_from(tufaceous_args)
.expect("args are valid so this shouldn't fail");
let rt =
tokio::runtime::Runtime::new().context("creating tokio runtime")?;
rt.block_on(async move { args.exec(&sim.log).await })
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Load an example system. The sled with serial5 is marked non-provisionable
# so that discretionary zones don't make their way onto it. (We're going to
# expunge it below to test that we don't try and update zone image sources
# on expunged sleds.)
load-example --nsleds 6 --ndisks-per-sled 1 --sled-policy 5:non-provisionable

sled-list

# Create a TUF repository from a fake manifest. (The output TUF repo is
# written to a temporary directory that this invocation of `reconfigurator-cli`
# is running out of as its working directory.)
tuf-assemble ../../update-common/manifests/fake.toml
# Create a second TUF repository from a different fake manifest.
tuf-assemble ../../update-common/manifests/fake-non-semver.toml --allow-non-semver

# Load the target release from the first TUF repository.
set target-release repo-1.0.0.zip

# On one sled, update the install dataset.
sled-update-install-dataset serial0 --to-target-release

# On another sled, simulate an error reading the zone manifest.
sled-update-install-dataset serial1 --with-manifest-error

# On a third sled, update the install dataset and simulate a mupdate override.
# (Currently we do this in the blueprint, but with
# https://github.com/oxidecomputer/omicron/pull/8456 we should update this test and
# set a mupdate-override on the sled directly.)
sled-update-install-dataset serial2 --to-target-release
blueprint-edit latest set-remove-mupdate-override serial2 ffffffff-ffff-ffff-ffff-ffffffffffff

# On a fourth sled, simulate an error validating the install dataset image on one zone.
# We pick ntp because internal-ntp is non-discretionary.
sled-update-install-dataset serial3 --to-target-release --with-zone-error ntp

# On a fifth sled, set the install dataset to the repo-2.0.0.zip generated by the
# second TUF repository.
sled-update-install-dataset serial4 --from-repo repo-2.0.0.zip

# On the sixth sled, update to the target release (so it shows up in inventory).
# Then, mark the sled expunged (in the planning input).
sled-update-install-dataset serial5 --to-target-release
sled-set-policy serial5 expunged

# Generate an inventory and run a blueprint planning step.
inventory-generate
blueprint-plan latest eb0796d5-ab8a-4f7b-a884-b4aeacb8ab51

# This diff should show expected changes to the blueprint.
blueprint-diff 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1 latest
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ generated inventory collection eb0796d5-ab8a-4f7b-a884-b4aeacb8ab51 from configu
> # Try to plan a new blueprint; this should be okay even though the sled
> # we added has no disks.
> blueprint-plan dbcbd3d6-41ff-48ae-ac0b-1becc9b2fd21 eb0796d5-ab8a-4f7b-a884-b4aeacb8ab51
INFO skipping noop image source check for all sleds (no current TUF repo)
INFO skipping sled (no zpools in service), sled_id: 00320471-945d-413c-85e7-03e091a70b3c
INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0
INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1
Expand Down
2 changes: 2 additions & 0 deletions dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,7 @@ T ENA ID PARENT
* yes ade5749d-bdf3-4fab-a8ae-00bea01b3a5a 02697f74-b14a-4418-90f0-c28b2a3a6aa9 <REDACTED_TIMESTAMP>

> blueprint-plan ade5749d-bdf3-4fab-a8ae-00bea01b3a5a
INFO skipping noop image source check for all sleds (no current TUF repo)
INFO found sled missing NTP zone (will add one), sled_id: 89d02b1b-478c-401a-8e28-7a26f74fa41b
INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0
WARN failed to place all new desired Clickhouse zones, placed: 0, wanted_to_place: 1
Expand Down Expand Up @@ -936,6 +937,7 @@ parent: 02697f74-b14a-4418-90f0-c28b2a3a6aa9
> # Plan a blueprint run -- this will cause zones and disks on the expunged
> # sled to be expunged.
> blueprint-plan latest
INFO skipping noop image source check for all sleds (no current TUF repo)
INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0
INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1
INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0
Expand Down
Loading
Loading