Skip to content

Commit

Permalink
Merge branch 'labgrid-project:master' into available-places
Browse files Browse the repository at this point in the history
  • Loading branch information
istepic authored Feb 4, 2025
2 parents 06b414b + de5257d commit ce322ba
Show file tree
Hide file tree
Showing 22 changed files with 181 additions and 89 deletions.
12 changes: 6 additions & 6 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Release 24.1 (Unreleased)
Release 25.0 (Unreleased)
-------------------------
As announced
`before <https://github.com/labgrid-project/labgrid/discussions/1467#discussioncomment-10314852>`_,
Expand All @@ -14,25 +14,25 @@ That's why labgrid moves to gRPC with this release. gRPC is a well maintained
RPC framework with a lot of users. As a side effect, the message transfer is
more performant and the import times are shorter.

New Features in 24.1
New Features in 25.0
~~~~~~~~~~~~~~~~~~~~
- All components can be installed into the same virtualenv again.
- The `QEMUDriver` now supports setting the ``display`` option to
``qemu-default``, which will neither set the QEMU ``-display`` option
or pass along ``-nographic``.

Bug fixes in 24.1
Bug fixes in 25.0
~~~~~~~~~~~~~~~~~

FIXME

Breaking changes in 24.1
Breaking changes in 25.0
~~~~~~~~~~~~~~~~~~~~~~~~
Maintaining support for both crossbar/autobahn as well as gRPC in labgrid would
be a lot of effort due to the different architectures of those frameworks.
Therefore, a hard migration to gRPC is deemed the lesser issue.

Due to the migration, 24.1 includes the following breaking changes:
Due to the migration, 25.0 includes the following breaking changes:

- The labgrid environment config option ``crossbar_url`` was renamed to
``coordinator_address``. The environment variable ``LG_CROSSBAR`` was renamed
Expand All @@ -51,7 +51,7 @@ Other breaking changes include:
removed) xdrlib. See
`issue #1507 <https://github.com/labgrid-project/labgrid/issues/1507>`_.

Known issues in 24.1
Known issues in 25.0
~~~~~~~~~~~~~~~~~~~~

FIXME
Expand Down
4 changes: 2 additions & 2 deletions debian/changelog
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
labgrid (24.1.0) UNRELEASED; urgency=low
labgrid (25.0.0) UNRELEASED; urgency=low

* See https://github.com/labgrid-project/labgrid/blob/master/CHANGES.rst

-- Bastian Krause <[email protected]> Tue, 13 Aug 2024 12:23:25 +0200
-- Bastian Krause <[email protected]> Fri, 21 Jan 2024 10:43:45 +0100

labgrid (24.0.0) UNRELEASED; urgency=low

Expand Down
6 changes: 3 additions & 3 deletions debian/copyright
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ Upstream-Name: labgrid
Source: https://github.com/labgrid-project/labgrid

Files: *
Copyright: Copyright (C) 2016-2024 Pengutronix, Jan Luebbe <[email protected]>
Copyright (C) 2016-2024 Pengutronix, Rouven Czerwinski <[email protected]>
Copyright: Copyright (C) 2016-2025 Pengutronix, Jan Luebbe <[email protected]>
Copyright (C) 2016-2025 Pengutronix, Rouven Czerwinski <[email protected]>
License: LGPL-2.1+

Files: man/*
Copyright: Copyright (C) 2016-2024 Pengutronix
Copyright: Copyright (C) 2016-2025 Pengutronix
License: LGPL-2.1+

License: LGPL-2.1+
Expand Down
2 changes: 1 addition & 1 deletion doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@

# General information about the project.
project = 'labgrid'
copyright = '2016-2024 Pengutronix, Jan Luebbe and Rouven Czerwinski'
copyright = '2016-2025 Pengutronix, Jan Luebbe and Rouven Czerwinski'
author = 'Jan Luebbe, Rouven Czerwinski'

# The version info for the project you're documenting, acts as replacement for
Expand Down
2 changes: 1 addition & 1 deletion dockerfiles/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ has_buildx() {
local docker_cmd
docker_cmd="${1}"

"${docker_cmd}" buildx --help >/dev/null 2>&1
"${docker_cmd}" buildx version >/dev/null 2>&1
}

get_docker_cmd() {
Expand Down
2 changes: 1 addition & 1 deletion labgrid/driver/networkinterfacedriver.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def configure(self, settings):
self.proxy.configure(self.iface.ifname, settings)

@Driver.check_active
@step()
@step(args=["expected"])
def wait_state(self, expected, timeout=60):
"""Wait until the expected state is reached or the timeout expires.
Expand Down
15 changes: 11 additions & 4 deletions labgrid/remote/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,9 +450,13 @@ def _check_allowed(self, place):
if f"{self.gethostname()}/{self.getuser()}" not in place.allowed:
host, user = place.acquired.split("/")
if user != self.getuser():
raise UserError(f"place {place.name} is not acquired by your user, acquired by {user}")
raise UserError(
f"place {place.name} is not acquired by your user, acquired by {user}. To work simultaneously, {user} can execute labgrid-client -p {place.name} allow {self.gethostname()}/{self.getuser()}"
)
if host != self.gethostname():
raise UserError(f"place {place.name} is not acquired on this computer, acquired on {host}")
raise UserError(
f"place {place.name} is not acquired on this computer, acquired on {host}. To allow this host, use labgrid-client -p {place.name} allow {self.gethostname()}/{self.getuser()} on the other host"
)

def get_place(self, place=None):
pattern = place or self.args.place
Expand All @@ -470,7 +474,10 @@ def get_place(self, place=None):
def get_idle_place(self, place=None):
place = self.get_place(place)
if place.acquired:
raise UserError(f"place {place.name} is not idle (acquired by {place.acquired})")
_, user = place.acquired.split("/")
raise UserError(
f"place {place.name} is not idle (acquired by {place.acquired}). To work simultaneously, {user} can execute labgrid-client -p {place.name} allow {self.gethostname()}/{self.getuser()}"
)
return place

def get_acquired_place(self, place=None):
Expand Down Expand Up @@ -1427,7 +1434,7 @@ async def create_reservation(self):
raise UserError(f"'{pair}' is not a valid filter (must contain a '=')")
if not TAG_KEY.match(k):
raise UserError(f"Key '{k}' in filter '{pair}' is invalid")
if not TAG_KEY.match(v):
if not TAG_VAL.match(v):
raise UserError(f"Value '{v}' in filter '{pair}' is invalid")
fltr[k] = v

Expand Down
167 changes: 125 additions & 42 deletions labgrid/remote/coordinator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import time
from contextlib import contextmanager
import copy
import random

import attr
import grpc
Expand All @@ -26,7 +27,7 @@
from .scheduler import TagSet, schedule
from .generated import labgrid_coordinator_pb2
from .generated import labgrid_coordinator_pb2_grpc
from ..util import atomic_replace, labgrid_version, yaml
from ..util import atomic_replace, labgrid_version, yaml, Timeout


@contextmanager
Expand Down Expand Up @@ -220,7 +221,7 @@ def __init__(self) -> None:
self.load()

self.loop = asyncio.get_running_loop()
for name in ["save", "reacquire", "schedule"]:
for name in ["save", "sync_resources", "schedule"]:
step_func = getattr(self, f"_poll_step_{name}")
task = self.loop.create_task(self.poll(step_func), name=f"coordinator-poll-{name}")
self.poll_tasks.append(task)
Expand All @@ -231,11 +232,11 @@ async def _poll_step_save(self):
with warn_if_slow("save changes", level=logging.DEBUG):
await self.save()

async def _poll_step_reacquire(self):
# try to re-acquire orphaned resources
async def _poll_step_sync_resources(self):
# try to synchronize resources
async with self.lock:
with warn_if_slow("reacquire orphaned resources", limit=3.0):
await self._reacquire_orphaned_resources()
with warn_if_slow("synchronize resources", limit=3.0):
await self._synchronize_resources()

async def _poll_step_schedule(self):
# update reservations
Expand Down Expand Up @@ -638,6 +639,14 @@ async def _acquire_resources(self, place, resources):
if resource.acquired:
return False

for otherplace in self.places.values():
for oldres in otherplace.acquired_resources:
if resource.path == oldres.path:
logging.info(
"Conflicting orphaned resource %s for acquire request for place %s", oldres, place.name
)
return False

# acquire resources
acquired = []
try:
Expand Down Expand Up @@ -692,47 +701,124 @@ async def _release_resources(self, place, resources, callback=True):
except:
logging.exception("failed to publish released resource %s", resource)

async def _reacquire_orphaned_resources(self):
async def _synchronize_resources(self):
assert self.lock.locked()

for place in self.places.values():
changed = False
# fix:
# - a resource is acquired for a place that is not acquired
# * perhaps caused by a resource acquire timeout (during normal lock)
# -> release()
# - a resource is acquired for a place that still has it as orphaned
# * perhaps caused by a resource acquire timeout (during reacquire)
# -> replace orphaned resource
# - a resource is released, but a place still has it as orphaned
# * perhaps caused by a exporter restart
# -> acquire() and replace orphaned resource

acquired_resources = {}
used_resources = {}
orphaned_resources = {}

# find acquired resources
for exporter in self.exporters.values():
for group in exporter.groups.values():
for resource in group.values():
if resource.acquired:
acquired_resources[resource.path] = resource

for idx, resource in enumerate(place.acquired_resources):
# find resources used by places
for place in self.places.values():
for resource in place.acquired_resources:
if not resource.orphaned:
continue
used_resources[resource.path] = resource
else:
orphaned_resources[resource.path] = resource

timeout = Timeout(5.0)

# find resources to be released
to_release = list(acquired_resources.keys() - used_resources.keys() - orphaned_resources.keys())
if to_release:
logging.info("synchronize resources: %s acquired resource(s) should be released", len(to_release))
random.shuffle(to_release) # don't get stuck on a problematic resource
for resource_path in to_release:
if timeout.expired:
continue # release the coordinator lock

resource = acquired_resources[resource_path]
if resource.acquired == "<broken>":
continue
place = self.places.get(resource.acquired)
print(f"should release {resource} for {place}?")

# is the exporter connected again?
exporter = self.get_exporter_by_name(resource.path[0])
if not exporter:
continue
if place is None:
logging.warning("resource %s claims to be acquired by unknown place", resource)
elif not place.acquired:
logging.warning("resource %s claims to be acquired by unacquired place", resource)
else:
continue
try:
await self._release_resources(place, [resource])
del acquired_resources[resource_path]
except Exception:
logging.exception("failed to release unused resource %s", resource)
break

# does the resource exist again?
try:
new_resource = exporter.groups[resource.path[1]][resource.path[3]]
except KeyError:
continue
# find orphaned resources to be acquired
to_acquire = list(orphaned_resources.keys() - acquired_resources.keys())
if to_acquire:
logging.info("synchronize resources: %s orphaned resource(s) should be acquired", len(to_acquire))
random.shuffle(to_acquire) # don't get stuck on a problematic resource
for resource_path in to_acquire:
if timeout.expired:
continue # release the coordinator lock

resource = orphaned_resources[resource_path]
if resource.acquired == "<broken>":
continue
place = self.places.get(resource.acquired)
assert place is not None
assert place.acquired
print(f"should acquire {resource} for {place}?")

# is the exporter connected again?
exporter = self.get_exporter_by_name(resource.path[0])
if not exporter:
continue

if new_resource.acquired:
# this should only happen when resources become broken
logging.debug("ignoring acquired/broken resource %s for place %s", new_resource, place.name)
continue
# does the resource exist again?
try:
new_resource = exporter.groups[resource.path[1]][resource.path[3]]
except KeyError:
continue

try:
await self._acquire_resource(place, new_resource)
place.acquired_resources[idx] = new_resource
except Exception:
logging.exception(
"failed to reacquire orphaned resource %s for place %s", new_resource, place.name
)
break

logging.info("reacquired orphaned resource %s for place %s", new_resource, place.name)
changed = True

if changed:
self._publish_place(place)
self.save_later()
if new_resource.acquired:
# this should only happen when resources become broken
logging.warning("ignoring acquired/broken resource %s for place %s", new_resource, place.name)
continue

try:
await self._acquire_resource(place, new_resource)
acquired_resources[new_resource.path] = new_resource
except Exception:
logging.exception("failed to reacquire orphaned resource %s for place %s", new_resource, place.name)
break

# find orphaned resources to be replaced in the places
to_replace = set(orphaned_resources.keys() & acquired_resources.keys())
if to_replace:
logging.info("synchronize resources: %s orphaned resource(s) should be replaced", len(to_replace))
for resource_path in set(orphaned_resources.keys() & acquired_resources.keys()):
oldresource = orphaned_resources[resource_path]
newresource = acquired_resources[resource_path]
assert oldresource.acquired == newresource.acquired

place = self.places.get(newresource.acquired)
assert place is not None
assert place.acquired

idx = place.acquired_resources.index(oldresource)
place.acquired_resources[idx] = newresource

@locked
async def AcquirePlace(self, request, context):
Expand All @@ -755,9 +841,6 @@ async def AcquirePlace(self, request, context):
if not res.owner == username:
await context.abort(grpc.StatusCode.PERMISSION_DENIED, f"Place {name} was not reserved for {username}")

# First try to reacquire orphaned resources to avoid conflicts.
await self._reacquire_orphaned_resources()

# FIXME use the session object instead? or something else which
# survives disconnecting clients?
place.acquired = username
Expand Down
8 changes: 8 additions & 0 deletions labgrid/resource/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,10 @@ class NetworkSigrokUSBDevice(RemoteUSBResource):
default=None,
validator=attr.validators.optional(attr.validators.instance_of(str))
)
channel_group = attr.ib(
default=None,
validator=attr.validators.optional(attr.validators.instance_of(str))
)
def __attrs_post_init__(self):
self.timeout = 10.0
super().__attrs_post_init__()
Expand All @@ -203,6 +207,10 @@ class NetworkSigrokUSBSerialDevice(RemoteUSBResource):
default=None,
validator=attr.validators.optional(attr.validators.instance_of(str))
)
channel_group = attr.ib(
default=None,
validator=attr.validators.optional(attr.validators.instance_of(str))
)
def __attrs_post_init__(self):
self.timeout = 10.0
super().__attrs_post_init__()
Expand Down
Loading

0 comments on commit ce322ba

Please sign in to comment.