Skip to content

Commit 5b9b078

Browse files
committed
Improve Test test_create_qemu_instance_online change sig to async
Change sig of enable_and_start to async Adapt Firecracker instance test - Rename to test_create_firecracker_instance - Use mocker to patch() settings so it doesn't contamine other tests - Ensure it ping properly to confirm it is working
1 parent 2e7edd2 commit 5b9b078

File tree

6 files changed

+82
-58
lines changed

6 files changed

+82
-58
lines changed

src/aleph/vm/models.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -336,10 +336,10 @@ async def start(self):
336336
await self.vm.start_guest_api()
337337

338338
# Start VM and snapshots automatically
339-
# If the execution is confidential, don't start it because we need to wait for the session certificate
340-
# files, use the endpoint /control/machine/{ref}/confidential/initialize to get session files and start the VM
339+
# If the execution is a confidential instance, it is start later in the process when the session certificate
340+
# files are received from the client via the endpoint /control/machine/{ref}/confidential/initialize endpoint
341341
if self.persistent and not self.is_confidential and self.systemd_manager:
342-
self.systemd_manager.enable_and_start(self.controller_service)
342+
await self.systemd_manager.enable_and_start(self.controller_service)
343343

344344
if self.is_program:
345345
await self.wait_for_init()
@@ -354,6 +354,7 @@ async def start(self):
354354
self.ready_event.set()
355355
await self.save()
356356
except Exception:
357+
logger.exception("%s error during start, tearing down", self)
357358
await self.vm.teardown()
358359
await self.vm.stop_guest_api()
359360
raise
@@ -392,11 +393,13 @@ async def non_blocking_wait_for_boot(self):
392393
await self.wait_for_persistent_boot()
393394
logger.info("%s responded to ping. Marking it as started.", self)
394395
self.times.started_at = datetime.now(tz=timezone.utc)
396+
return True
395397
# await self.save()
396398
except Exception as e:
397399
logger.warning("%s failed to responded to ping or is not running, stopping it.: %s ", self, e)
398400
assert self.vm
399401
await self.stop()
402+
return False
400403

401404
async def wait_for_init(self):
402405
assert self.vm, "The VM attribute has to be set before calling wait_for_init()"

src/aleph/vm/orchestrator/views/operator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ async def operate_confidential_initialize(request: web.Request, authenticated_se
260260
godh_file_path = vm_session_path / "vm_godh.b64"
261261
godh_file_path.write_bytes(godh_file_content.file.read())
262262

263-
pool.systemd_manager.enable_and_start(execution.controller_service)
263+
await pool.systemd_manager.enable_and_start(execution.controller_service)
264264

265265
return web.Response(status=200, body=f"Started VM with ref {vm_hash}")
266266

src/aleph/vm/systemd.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def is_service_active(self, service: str) -> bool:
7272
logger.error(error)
7373
return False
7474

75-
def enable_and_start(self, service: str) -> None:
75+
async def enable_and_start(self, service: str) -> None:
7676
if not self.is_service_enabled(service):
7777
self.enable(service)
7878
if not self.is_service_active(service):

tests/supervisor/test_instance.py

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@ class MockSystemDManager(SystemDManager):
2323
execution: MicroVM | None = None
2424
process: Process | None = None
2525

26-
async def enable_and_start(self, vm_hash: str):
26+
async def enable_and_start(self, service: str) -> tuple[MicroVM | None, Process | None]:
27+
vm_hash = service.split("@", maxsplit=1)[1].split(".", maxsplit=1)[0]
28+
2729
config_path = Path(f"{settings.EXECUTION_ROOT}/{vm_hash}-controller.json")
2830
config = configuration_from_file(config_path)
2931
self.execution, self.process = await execute_persistent_vm(config)
@@ -35,7 +37,7 @@ def is_service_enabled(self, service: str):
3537
def is_service_active(self, service: str):
3638
return self.process is not None
3739

38-
async def stop_and_disable(self, vm_hash: str):
40+
async def stop_and_disable(self, service: str):
3941
if self.execution:
4042
await self.execution.shutdown()
4143
await self.execution.stop()
@@ -45,25 +47,31 @@ async def stop_and_disable(self, vm_hash: str):
4547

4648

4749
@pytest.mark.asyncio
48-
async def test_create_instance():
49-
"""
50-
Create a fake instance locally and check that it start / init / stop properly.
50+
async def test_create_firecracker_instance(mocker):
51+
"""Create a fake instance locally and check that it start / init / stop properly.
52+
53+
NOTE: If Firecracker VM fail to boot because the disk is broken try:
54+
```bash
55+
sudo dmsetup remove decadecadecadecadecadecadecadecadecadecadecadecadecadecadecadeca_rootfs
56+
sudo dmsetup remove decadecadecadecadecadecadecadecadecadecadecadecadecadecadecadeca_base
57+
sudo losetup -l | grep 'persistent' | grep deleted | awk '{print $1}' | sudo xargs -I{} losetup -d {}
58+
sudo rm -rf /var/lib/aleph/vm/volumes/persistent/decadecadecadecadecadecadecadecadecadecadecadecadecadecadecadeca/rootfs.btrfs
59+
```
5160
"""
61+
mocker.patch.object(settings, "ALLOW_VM_NETWORKING", True)
62+
mocker.patch.object(settings, "USE_FAKE_INSTANCE_BASE", True)
63+
mocker.patch.object(settings, "FAKE_DATA_PROGRAM", settings.BENCHMARK_FAKE_DATA_PROGRAM)
64+
mocker.patch.object(settings, "USE_JAILER", True)
5265

53-
settings.USE_FAKE_INSTANCE_BASE = True
54-
settings.FAKE_DATA_PROGRAM = settings.BENCHMARK_FAKE_DATA_PROGRAM
55-
# settings.FAKE_INSTANCE_MESSAGE
56-
settings.ALLOW_VM_NETWORKING = True
57-
settings.USE_JAILER = True
58-
59-
logging.basicConfig(level=logging.DEBUG)
60-
settings.PRINT_SYSTEM_LOGS = True
66+
# logging.basicConfig(level=logging.DEBUG)
6167

6268
# Ensure that the settings are correct and required files present.
6369
settings.setup()
6470
settings.check()
6571
if not settings.FAKE_INSTANCE_BASE.exists():
66-
pytest.xfail("Test Runtime not setup. run `cd runtimes/instance-rootfs && sudo ./create-debian-12-disk.sh`")
72+
pytest.xfail(
73+
f"Test Runtime not setup. {settings.FAKE_INSTANCE_BASE}. run `cd runtimes/instance-rootfs && sudo ./create-debian-12-disk.sh`"
74+
)
6775

6876
# The database is required for the metrics and is currently not optional.
6977
engine = metrics.setup_engine()
@@ -93,7 +101,7 @@ async def test_create_instance():
93101
message=message.content,
94102
original=message.content,
95103
snapshot_manager=None,
96-
systemd_manager=None,
104+
systemd_manager=mock_systemd_manager,
97105
persistent=True,
98106
)
99107

@@ -114,13 +122,17 @@ async def test_create_instance():
114122
assert vm.enable_networking
115123

116124
await execution.start()
117-
firecracker_execution, process = await mock_systemd_manager.enable_and_start(execution.vm_hash)
125+
# firecracker_execution, process = await mock_systemd_manager.enable_and_start(execution.vm_hash)
126+
firecracker_execution = mock_systemd_manager.execution
118127
assert isinstance(firecracker_execution, MicroVM)
119128
assert firecracker_execution.proc is not None
120-
await execution.wait_for_init()
121129

122-
# This sleep is to leave the instance to boot up and prevent disk corruption
130+
await execution.init_task
131+
assert execution.init_task.result() is True, "VM failed to start"
132+
133+
# This sleep is to leave the instance to boo
134+
# up and prevent disk corruption
123135
await asyncio.sleep(60)
124-
firecracker_execution, process = await mock_systemd_manager.stop_and_disable(execution.vm_hash)
136+
firecracker_execution, process = await mock_systemd_manager.stop_and_disable(execution.controller_service)
125137
await execution.stop()
126138
assert firecracker_execution is None

tests/supervisor/test_qemu_instance.py

Lines changed: 41 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,13 @@ class MockSystemDManager(SystemDManager):
2323
execution: QemuVM | None = None
2424
process: Process | None = None
2525

26-
async def enable_and_start(self, vm_hash: str):
26+
async def enable_and_start(self, service: str) -> tuple[QemuVM | None, Process | None]:
27+
# aleph-vm-controller@decadecadecadecadecadecadecadecadecadecadecadecadecadecadecadeca.service-controller.json
28+
if '@' in service:
29+
vm_hash = service.split('@', maxsplit=1)[1].split('.', maxsplit=1)[0]
30+
else:
31+
vm_hash = service
32+
2733
config_path = Path(f"{settings.EXECUTION_ROOT}/{vm_hash}-controller.json")
2834
config = configuration_from_file(config_path)
2935
self.execution, self.process = await execute_persistent_vm(config)
@@ -33,7 +39,7 @@ def is_service_enabled(self, service: str):
3339
return self.process is not None
3440

3541
def is_service_active(self, service: str):
36-
return self.process is not None
42+
return self.process is not None and not self.process.returncode
3743

3844
async def stop_and_disable(self, vm_hash: str):
3945
if self.process:
@@ -92,34 +98,39 @@ async def test_create_qemu_instance():
9298
assert vm.vm_id == vm_id
9399

94100
await execution.start()
95-
qemu_execution, process = await mock_systemd_manager.enable_and_start(execution.vm_hash)
101+
qemu_execution, process = await mock_systemd_manager.enable_and_start(execution.controller_service)
96102
assert isinstance(qemu_execution, QemuVM)
97103
assert qemu_execution.qemu_process is not None
98-
qemu_execution, process = await mock_systemd_manager.stop_and_disable(execution.vm_hash)
104+
await mock_systemd_manager.stop_and_disable(execution.vm_hash)
105+
await qemu_execution.qemu_process.wait()
106+
assert qemu_execution.qemu_process.returncode is not None
99107
await execution.stop()
100-
assert qemu_execution is None
101108

102109

103110
@pytest.mark.asyncio
104-
async def test_create_qemu_instance_online():
111+
async def test_create_qemu_instance_online(mocker):
105112
"""
106113
Create an instance and check that it start / init / stop properly.
114+
With network, wait for ping
107115
"""
108-
109-
settings.USE_FAKE_INSTANCE_BASE = True
110-
settings.FAKE_INSTANCE_MESSAGE = settings.FAKE_INSTANCE_QEMU_MESSAGE
111-
settings.FAKE_INSTANCE_BASE = settings.FAKE_QEMU_INSTANCE_BASE
112-
settings.ENABLE_CONFIDENTIAL_COMPUTING = False
113-
settings.ALLOW_VM_NETWORKING = True
114-
settings.USE_JAILER = False
116+
mocker.patch.object(settings, "ALLOW_VM_NETWORKING", True)
117+
mocker.patch.object(settings, "USE_FAKE_INSTANCE_BASE", True)
118+
mocker.patch.object(settings, "FAKE_INSTANCE_MESSAGE", settings.FAKE_INSTANCE_QEMU_MESSAGE)
119+
mocker.patch.object(settings, "FAKE_INSTANCE_BASE", settings.FAKE_INSTANCE_QEMU_MESSAGE)
120+
mocker.patch.object(settings, "ENABLE_CONFIDENTIAL_COMPUTING", False)
121+
mocker.patch.object(settings, "USE_JAILER", False)
115122

116123
logging.basicConfig(level=logging.DEBUG)
117124

118125
# Ensure that the settings are correct and required files present.
119126
settings.setup()
120127
settings.check()
121128
if not settings.FAKE_INSTANCE_BASE.exists():
122-
pytest.xfail("Test Runtime not setup. run `cd runtimes/instance-rootfs && sudo ./create-debian-12-disk.sh`")
129+
pytest.xfail(
130+
"Test instance disk {} not setup. run `cd runtimes/instance-rootfs && sudo ./create-debian-12-disk.sh` ".format(
131+
settings.FAKE_QEMU_INSTANCE_BASE
132+
)
133+
)
123134

124135
# The database is required for the metrics and is currently not optional.
125136
engine = metrics.setup_engine()
@@ -130,29 +141,26 @@ async def test_create_qemu_instance_online():
130141

131142
mock_systemd_manager = MockSystemDManager()
132143

133-
network = (
134-
Network(
135-
vm_ipv4_address_pool_range=settings.IPV4_ADDRESS_POOL,
136-
vm_network_size=settings.IPV4_NETWORK_PREFIX_LENGTH,
137-
external_interface=settings.NETWORK_INTERFACE,
138-
ipv6_allocator=make_ipv6_allocator(
139-
allocation_policy=settings.IPV6_ALLOCATION_POLICY,
140-
address_pool=settings.IPV6_ADDRESS_POOL,
141-
subnet_prefix=settings.IPV6_SUBNET_PREFIX,
142-
),
143-
use_ndp_proxy=False,
144-
ipv6_forwarding_enabled=False,
145-
)
146-
if settings.ALLOW_VM_NETWORKING
147-
else None
144+
network = Network(
145+
vm_ipv4_address_pool_range=settings.IPV4_ADDRESS_POOL,
146+
vm_network_size=settings.IPV4_NETWORK_PREFIX_LENGTH,
147+
external_interface=settings.NETWORK_INTERFACE,
148+
ipv6_allocator=make_ipv6_allocator(
149+
allocation_policy=settings.IPV6_ALLOCATION_POLICY,
150+
address_pool=settings.IPV6_ADDRESS_POOL,
151+
subnet_prefix=settings.IPV6_SUBNET_PREFIX,
152+
),
153+
use_ndp_proxy=False,
154+
ipv6_forwarding_enabled=False,
148155
)
156+
network.setup()
149157

150158
execution = VmExecution(
151159
vm_hash=vm_hash,
152160
message=message.content,
153161
original=message.content,
154162
snapshot_manager=None,
155-
systemd_manager=None,
163+
systemd_manager=mock_systemd_manager,
156164
persistent=True,
157165
)
158166

@@ -170,10 +178,11 @@ async def test_create_qemu_instance_online():
170178
assert vm.vm_id == vm_id
171179

172180
await execution.start()
173-
qemu_execution, process = await mock_systemd_manager.enable_and_start(execution.vm_hash)
181+
qemu_execution = mock_systemd_manager.execution
174182
assert isinstance(qemu_execution, QemuVM)
175183
assert qemu_execution.qemu_process is not None
176-
await execution.wait_for_init()
184+
await execution.init_task
185+
assert execution.init_task.result() is True, "VM failed to start"
177186
qemu_execution, process = await mock_systemd_manager.stop_and_disable(execution.vm_hash)
178187
await execution.stop()
179188
assert qemu_execution is None

tests/supervisor/views/test_operator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ async def test_operator_confidential_initialize_not_confidential(aiohttp_client,
219219

220220

221221
@pytest.mark.asyncio
222-
async def test_operator_confidential_initialize(aiohttp_client):
222+
async def test_operator_confidential_initialize(aiohttp_client, mocker):
223223
"""Test that the certificates system endpoint responds. No auth needed"""
224224

225225
settings.ENABLE_QEMU_SUPPORT = True
@@ -236,7 +236,7 @@ class FakeExecution:
236236
controller_service: str = ""
237237

238238
class MockSystemDManager:
239-
enable_and_start = MagicMock(return_value=True)
239+
enable_and_start = mocker.AsyncMock(return_value=True)
240240

241241
class FakeVmPool:
242242
executions: dict[ItemHash, FakeExecution] = {}

0 commit comments

Comments
 (0)