Skip to content

[22/n] [reconfigurator-cli] specify sled by serial number #8489

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 96 additions & 34 deletions dev-tools/reconfigurator-cli/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ use omicron_uuid_kinds::SledUuid;
use omicron_uuid_kinds::VnicUuid;
use omicron_uuid_kinds::{BlueprintUuid, MupdateOverrideUuid};
use std::borrow::Cow;
use std::convert::Infallible;
use std::fmt::{self, Write};
use std::io::IsTerminal;
use std::num::ParseIntError;
Expand Down Expand Up @@ -341,7 +342,7 @@ struct SledAddArgs {
#[derive(Debug, Args)]
struct SledArgs {
/// id of the sled
sled_id: SledUuid,
sled_id: SledOpt,

/// Filter to match sled ID against
#[clap(short = 'F', long, value_enum, default_value_t = SledFilter::Commissioned)]
Expand All @@ -351,7 +352,7 @@ struct SledArgs {
#[derive(Debug, Args)]
struct SledSetPolicyArgs {
/// id of the sled
sled_id: SledUuid,
sled_id: SledOpt,

/// The policy to set for the sled
#[clap(value_enum)]
Expand Down Expand Up @@ -392,7 +393,7 @@ impl From<SledPolicyOpt> for SledPolicy {
#[derive(Debug, Args)]
struct SledUpdateSpArgs {
/// id of the sled
sled_id: SledUuid,
sled_id: SledOpt,

/// sets the version reported for the SP active slot
#[clap(long, required_unless_present_any = &["inactive"])]
Expand All @@ -406,7 +407,7 @@ struct SledUpdateSpArgs {
#[derive(Debug, Args)]
struct SledRemoveArgs {
/// id of the sled
sled_id: SledUuid,
sled_id: SledOpt,
}

#[derive(Debug, Args)]
Expand Down Expand Up @@ -452,10 +453,10 @@ enum BlueprintEditCommands {
/// add a Nexus instance to a particular sled
AddNexus {
/// sled on which to deploy the new instance
sled_id: SledUuid,
sled_id: SledOpt,
},
/// add a CockroachDB instance to a particular sled
AddCockroach { sled_id: SledUuid },
AddCockroach { sled_id: SledOpt },
/// set the image source for a zone
SetZoneImage {
/// id of zone whose image to set
Expand All @@ -466,7 +467,7 @@ enum BlueprintEditCommands {
/// set the remove_mupdate_override field for a sled
SetRemoveMupdateOverride {
/// sled to set the field on
sled_id: SledUuid,
sled_id: SledOpt,

/// the UUID to set the field to, or "unset"
value: MupdateOverrideUuidOpt,
Expand Down Expand Up @@ -521,17 +522,66 @@ enum BlueprintEditDebugCommands {
/// the sled from the blueprint.
RemoveSled {
/// the sled to remove
sled: SledUuid,
sled: SledOpt,
},

/// Bump a sled's generation number, even if nothing else about the sled has
/// changed.
ForceSledGenerationBump {
/// the sled to bump the sled-agent generation number of
sled: SledUuid,
sled: SledOpt,
},
}

/// Identifies a sled in a system.
#[derive(Clone, Debug)]
enum SledOpt {
/// Identifies a sled by its UUID.
Uuid(SledUuid),
/// Identifies a sled by its serial number.
Serial(String),
}

impl SledOpt {
/// Resolves this sled option into a sled UUID.
fn to_sled_id(
&self,
description: &SystemDescription,
) -> anyhow::Result<SledUuid> {
match self {
SledOpt::Uuid(uuid) => Ok(*uuid),
SledOpt::Serial(serial) => description.serial_to_sled_id(&serial),
}
}
}

impl FromStr for SledOpt {
type Err = Infallible;

fn from_str(s: &str) -> Result<Self, Self::Err> {
// If the sled looks like a UUID, parse it as that.
if let Ok(uuid) = s.parse::<SledUuid>() {
return Ok(SledOpt::Uuid(uuid));
}

// We treat anything that doesn't parse as a UUID as a serial number.
//
// Can we do something more intelligent here, like looking for a
// particular prefix? In principle, yes, but in reality there are
// several different sources of serial numbers:
//
// * simulated sleds ("serial0", "serial1", ...)
// * real sleds ("BRM42220014")
// * a4x2 ("g0", "g1", ...)
// * single-sled dev deployments
//
// and possibly more. We could exhaustively enumerate all of them, but
// it's easier to assume that if it doesn't look like a UUID, it's a
// serial number.
Ok(Self::Serial(s.to_owned()))
}
}

#[derive(Clone, Debug)]
enum BlueprintIdOpt {
/// use the target blueprint
Expand Down Expand Up @@ -905,23 +955,33 @@ fn cmd_sled_add(
let mut state = sim.current_state().to_mut();
let sled_id = add.sled_id.unwrap_or_else(|| state.rng_mut().next_sled_id());
let new_sled = SledBuilder::new().id(sled_id).npools(add.ndisks);
state.system_mut().description_mut().sled(new_sled)?;
let system = state.system_mut();
system.description_mut().sled(new_sled)?;
// Figure out what serial number this sled was assigned.
let added_sled = system
.description()
.get_sled(sled_id)
.expect("we just added this sled");
let serial = match added_sled.sp_state() {
Some((_, sp_state)) => sp_state.serial_number.clone(),
None => "(none)".to_owned(),
};
sim.commit_and_bump(
format!("reconfigurator-cli sled-add: {sled_id}"),
format!("reconfigurator-cli sled-add: {sled_id} (serial: {serial})"),
state,
);

Ok(Some(format!("added sled {}", sled_id)))
Ok(Some(format!("added sled {} (serial: {})", sled_id, serial)))
}

fn cmd_sled_remove(
sim: &mut ReconfiguratorSim,
args: SledRemoveArgs,
) -> anyhow::Result<Option<String>> {
let mut state = sim.current_state().to_mut();
let sled_id = args.sled_id;
state
.system_mut()
let system = state.system_mut();
let sled_id = args.sled_id.to_sled_id(system.description())?;
system
.description_mut()
.sled_remove(sled_id)
.context("failed to remove sled")?;
Expand All @@ -938,7 +998,7 @@ fn cmd_sled_show(
) -> anyhow::Result<Option<String>> {
let state = sim.current_state();
let description = state.system().description();
let sled_id = args.sled_id;
let sled_id = args.sled_id.to_sled_id(description)?;
let sp_active_version = description.sled_sp_active_version(sled_id)?;
let sp_inactive_version = description.sled_sp_inactive_version(sled_id)?;
let planning_input = description
Expand Down Expand Up @@ -966,18 +1026,17 @@ fn cmd_sled_set_policy(
args: SledSetPolicyArgs,
) -> anyhow::Result<Option<String>> {
let mut state = sim.current_state().to_mut();
state
.system_mut()
.description_mut()
.sled_set_policy(args.sled_id, args.policy.into())?;
let system = state.system_mut();
let sled_id = args.sled_id.to_sled_id(system.description())?;
system.description_mut().sled_set_policy(sled_id, args.policy.into())?;
sim.commit_and_bump(
format!(
"reconfigurator-cli sled-set-policy: {} to {}",
args.sled_id, args.policy,
sled_id, args.policy,
),
state,
);
Ok(Some(format!("set sled {} policy to {}", args.sled_id, args.policy)))
Ok(Some(format!("set sled {} policy to {}", sled_id, args.policy)))
}

fn cmd_sled_update_sp(
Expand All @@ -998,26 +1057,24 @@ fn cmd_sled_update_sp(
);

let mut state = sim.current_state().to_mut();
state.system_mut().description_mut().sled_update_sp_versions(
args.sled_id,
let system = state.system_mut();
let sled_id = args.sled_id.to_sled_id(system.description())?;
system.description_mut().sled_update_sp_versions(
sled_id,
args.active,
args.inactive,
)?;

sim.commit_and_bump(
format!(
"reconfigurator-cli sled-update-sp: {}: {}",
args.sled_id,
sled_id,
labels.join(", "),
),
state,
);

Ok(Some(format!(
"set sled {} SP versions: {}",
args.sled_id,
labels.join(", ")
)))
Ok(Some(format!("set sled {} SP versions: {}", sled_id, labels.join(", "))))
}

fn cmd_inventory_list(
Expand Down Expand Up @@ -1226,18 +1283,21 @@ fn cmd_blueprint_edit(

let label = match args.edit_command {
BlueprintEditCommands::AddNexus { sled_id } => {
let sled_id = sled_id.to_sled_id(system.description())?;
builder
.sled_add_zone_nexus(sled_id)
.context("failed to add Nexus zone")?;
format!("added Nexus zone to sled {}", sled_id)
}
BlueprintEditCommands::AddCockroach { sled_id } => {
let sled_id = sled_id.to_sled_id(system.description())?;
builder
.sled_add_zone_cockroachdb(sled_id)
.context("failed to add CockroachDB zone")?;
format!("added CockroachDB zone to sled {}", sled_id)
}
BlueprintEditCommands::SetRemoveMupdateOverride { sled_id, value } => {
let sled_id = sled_id.to_sled_id(system.description())?;
builder
.sled_set_remove_mupdate_override(sled_id, value.into())
.context("failed to set remove_mupdate_override")?;
Expand Down Expand Up @@ -1344,15 +1404,17 @@ fn cmd_blueprint_edit(
BlueprintEditCommands::Debug {
command: BlueprintEditDebugCommands::RemoveSled { sled },
} => {
builder.debug_sled_remove(sled)?;
format!("debug: removed sled {sled} from blueprint")
let sled_id = sled.to_sled_id(system.description())?;
builder.debug_sled_remove(sled_id)?;
format!("debug: removed sled {sled_id} from blueprint")
}
BlueprintEditCommands::Debug {
command:
BlueprintEditDebugCommands::ForceSledGenerationBump { sled },
} => {
builder.debug_sled_force_generation_bump(sled)?;
format!("debug: forced sled {sled} generation bump")
let sled_id = sled.to_sled_id(system.description())?;
builder.debug_sled_force_generation_bump(sled_id)?;
format!("debug: forced sled {sled_id} generation bump")
}
};

Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# Load example system with 7 sleds:
#
# sled 0: unset -> unset (unchanged)
# sled 1: unset -> set
# sled 2: set -> unset
# sled 3: set -> set (unchanged)
# sled 4: set -> set (changed)
# sled 5: set -> set (unchanged) but change something else
# sled 6: set -> sled removed
# serial0: unset -> unset (unchanged)
# serial1: unset -> set
# serial2: set -> unset
# serial3: set -> set (unchanged)
# serial4: set -> set (changed)
# serial5: set -> set (unchanged) but change something else
# serial6: set -> sled removed
#
# We'll also add another sled below (new_sled_id) with
# remove_mupdate_override set.
Expand All @@ -15,28 +15,29 @@
# outputs minimal.

load-example --nsleds 7 --ndisks-per-sled 0 --no-zones
sled-list

# Set the field on sleds 2-6 (0-indexed).
blueprint-edit latest set-remove-mupdate-override 9a867dc9-d505-427f-9eff-cdb1d4d9bd73 00000000-0000-0000-0000-000000000000
blueprint-edit latest set-remove-mupdate-override aff6c093-197d-42c5-ad80-9f10ba051a34 00000000-0000-0000-0000-000000000000
blueprint-edit latest set-remove-mupdate-override b82ede02-399c-48c6-a1de-411df4fa49a7 00000000-0000-0000-0000-000000000000
blueprint-edit latest set-remove-mupdate-override d81c6a84-79b8-4958-ae41-ea46c9b19763 00000000-0000-0000-0000-000000000000
blueprint-edit latest set-remove-mupdate-override e96e226f-4ed9-4c01-91b9-69a9cd076c9e 00000000-0000-0000-0000-000000000000
blueprint-edit latest set-remove-mupdate-override serial2 00000000-0000-0000-0000-000000000000
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this change overall quite a lot; there are other places (e.g., omdb) I've wanted to refer to sleds by serial instead of ID too. This might be too bikesheddy, but I don't love the syntax where UUIDs get parsed as-is and serials have a special prefix. What would you think about either of these?

  • Implicitly parse anything that doesn't look like a UUID as a serial number (so this value would just be 2)
  • Make the kind explicit for both; either something like id:SOME-UUID / serial:SERIAL or as more of a CLI-like argument like --id SOME-UUID / --serial SERIAL

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I agree, in that I like that you can specify a serial number, but I also want to be able to do this with data from real systems whose serials don't start with serial. I don't think actual serials can overlap with uuids, so I like the idea of: if it parses as a uuid, make it a uuid. Otherwise, it's a serial number. @jgallagher was that what you meant as your first bullet?

Copy link
Contributor

@jgallagher jgallagher Jul 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep! Although I misunderstood the serial prefix here. I saw the parsing check for it as a prefix and assumed it stripped that prefix (e.g., the serial number of this simulated sled is actually 2), but going back you're right that this only works if serials are literally serialSomething. So my first bullet is off a little because of that misunderstanding.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point re real sleds — thoughts on using the BRM prefix? Mostly I was concerned about a slightly typo'd UUID causing it to be treated as a serial, resulting in confusing error messages.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using BRM as a prefix for simulated sleds seems a little sketchy to me. A slightly typo'd UUID seems like it would give a pretty clear error message, right? "No sled with serial $ALMOST_A_UUID"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I mean "serial" for simulated sleds and "BRM" for real sleds loaded in via JSON.

Copy link
Contributor Author

@sunshowers sunshowers Jul 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using BRM as a prefix for simulated sleds seems a little sketchy to me. A slightly typo'd UUID seems like it would give a pretty clear error message, right? "No sled with serial $ALMOST_A_UUID"?

I do think that's a bit more confusing than the error messages produced by explicitly checking for the serial and BRM prefixes, but maybe it's okay anyway.

edit: what I mean is changing this to:

  • if it parses as a UUID, use that
  • otherwise, if it has a serial prefix, treat it as a serial number
  • otherwise, if it has a BRM prefix, treat it as a serial number
  • otherwise, error out

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ug, sorry for just remembering this, but there are least three other sources of serial numbers:

  • a4x2 (IIRC, these are g0, g1, ...?)
  • single-sled dev deployments (I have no idea)
  • the Canada region (I have no idea, although @jmpesp could probably tell us)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh, hmm. In that case I guess treating any non-UUID strings as serials makes the most sense.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done -- the error messages are something like

〉sled-show a3a47d70-a5f4-42e2-b8fa-32a92795c88
error: sled not found with serial a3a47d70-a5f4-42e2-b8fa-32a92795c88 (known serials: serial0, serial1, serial2)

which seems fine.

blueprint-edit latest set-remove-mupdate-override serial3 00000000-0000-0000-0000-000000000000
blueprint-edit latest set-remove-mupdate-override serial4 00000000-0000-0000-0000-000000000000
blueprint-edit latest set-remove-mupdate-override serial5 00000000-0000-0000-0000-000000000000
blueprint-edit latest set-remove-mupdate-override serial6 00000000-0000-0000-0000-000000000000

blueprint-show latest

# Now make another blueprint, starting by adding a new sled and removing sled 6.
sled-add --ndisks 0
blueprint-edit latest debug remove-sled e96e226f-4ed9-4c01-91b9-69a9cd076c9e
sled-remove e96e226f-4ed9-4c01-91b9-69a9cd076c9e
blueprint-edit latest debug remove-sled serial6
sled-remove serial6
inventory-generate

# Edit sleds 1, 2, 4, 5, and the new one.
blueprint-edit latest set-remove-mupdate-override 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 ffffffff-ffff-ffff-ffff-ffffffffffff
blueprint-edit latest set-remove-mupdate-override 9a867dc9-d505-427f-9eff-cdb1d4d9bd73 unset
blueprint-edit latest set-remove-mupdate-override b82ede02-399c-48c6-a1de-411df4fa49a7 ffffffff-ffff-ffff-ffff-ffffffffffff
blueprint-edit latest debug force-sled-generation-bump d81c6a84-79b8-4958-ae41-ea46c9b19763
blueprint-edit latest set-remove-mupdate-override 00320471-945d-413c-85e7-03e091a70b3c ffffffff-ffff-ffff-ffff-ffffffffffff
blueprint-edit latest set-remove-mupdate-override serial1 ffffffff-ffff-ffff-ffff-ffffffffffff
blueprint-edit latest set-remove-mupdate-override serial2 unset
blueprint-edit latest set-remove-mupdate-override serial4 ffffffff-ffff-ffff-ffff-ffffffffffff
blueprint-edit latest debug force-sled-generation-bump serial5
blueprint-edit latest set-remove-mupdate-override serial7 ffffffff-ffff-ffff-ffff-ffffffffffff

blueprint-diff 7f976e0d-d2a5-4eeb-9e82-c82bc2824aba latest

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ f45ba181-4b56-42cc-a762-874d90184a43 0 <REDACTED_TIMESTAMP>

> # Add a new sled with no disks.
> sled-add --ndisks 0
added sled 00320471-945d-413c-85e7-03e091a70b3c
added sled 00320471-945d-413c-85e7-03e091a70b3c (serial: serial3)


> # Generate a new inventory collection that includes that sled.
Expand Down
Loading
Loading