Skip to content

Commit

Permalink
Fix failing import in test (See CI failure https://github.com/kinu-ga…
Browse files Browse the repository at this point in the history
  • Loading branch information
130s committed Feb 24, 2023
1 parent 8662757 commit ae82cde
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 18 deletions.
1 change: 1 addition & 0 deletions computer_hw/package.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,6 @@
<exec_depend>diagnostic_aggregator</exec_depend>
<exec_depend>libsensors_monitor</exec_depend>
<exec_depend>rospy</exec_depend>
<test_depend>roslib</test_depend>

</package>
29 changes: 17 additions & 12 deletions computer_hw/src/computer_hw/nvidia_temperature_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from diagnostic_msgs.msg import DiagnosticArray, DiagnosticStatus, KeyValue
import rospy

from computer_hw.gpu_util import Nvidia_GPU_Stat
from computer_hw.gpu_util import GPUStatusHandler


class NVidiaTempMonitor(object):
Expand Down Expand Up @@ -111,33 +111,38 @@ def pub_status(self):
def parse_smi_output(output):
gpu_stat = GPUStatus()

gpu_stat.product_name = _find_val(output, 'Product Name')
gpu_stat.pci_device_id = _find_val(output, 'PCI Device/Vendor ID')
gpu_stat.pci_location = _find_val(output, 'PCI Location ID')
gpu_stat.display = _find_val(output, 'Display')
gpu_stat.driver_version = _find_val(output, 'Driver Version')
gpu_stat.product_name = GPUStatusHandler._find_val(output, 'Product Name')
gpu_stat.pci_device_id = GPUStatusHandler._find_val(output, 'PCI Device/Vendor ID')
gpu_stat.pci_location = GPUStatusHandler._find_val(output, 'PCI Location ID')
gpu_stat.display = GPUStatusHandler._find_val(output, 'Display')
gpu_stat.driver_version = GPUStatusHandler._find_val(output, 'Driver Version')

TEMPERATURE_QUERIES = ["Temperature", "GPU Current Temp"]
for query in TEMPERATURE_QUERIES:
temp_str = _find_val(output, query)
temp_str = GPUStatusHandler._find_val(output, query)
if temp_str:
temp, units = temp_str.split()
try:
temp, units = temp_str.split()
except ValueError as e:
# Not sure but there seems to be a case where a single ,
# non-splittable value gets returned.
temp = temp_str.split()
gpu_stat.temperature = int(temp)
break

fan_str = _find_val(output, 'Fan Speed')
fan_str = GPUStatusHandler._find_val(output, 'Fan Speed')
if fan_str:
# Fan speed in RPM
fan_spd = float(fan_str.strip('\%').strip()) * 0.01 * MAX_FAN_RPM
fan_spd = float(fan_str.strip('\%').strip()) * 0.01 * GPUStatusHandler.MAX_FAN_RPM
# Convert fan speed to Hz
gpu_stat.fan_speed = _rpm_to_rads(fan_spd)

usage_str = _find_val(output, 'GPU')
usage_str = GPUStatusHandler._find_val(output, 'GPU')
if usage_str:
usage = usage_str.strip('\%').strip()
gpu_stat.gpu_usage = int(usage)

mem_str = _find_val(output, 'Memory')
mem_str = GPUStatusHandler._find_val(output, 'Memory')
if mem_str:
mem = mem_str.strip('\%').strip()
gpu_stat.memory_usage = int(mem)
Expand Down
15 changes: 9 additions & 6 deletions computer_hw/test/parse_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@
import sys
import unittest

import roslib

import computer_hw
from computer_hw.nvidia_temperature_monitor import NVidiaTempMonitor, parse_smi_output

TEXT_PATH = 'test/sample_output/nvidia_smi_out_2021.txt'
TEXT_HIGH_TEMP_PATH = 'test/sample_output/nvidia_smi_high_temp.txt'
Expand All @@ -58,7 +61,7 @@ def setUp(self):
self.high_temp_data = f.read()

def test_parse(self):
gpu_stat = computer_hw.parse_smi_output(self.data)
gpu_stat = parse_smi_output(self.data)

# Check valid
self.assert_(self.data, "Unable to read sample output, no test to run")
Expand All @@ -73,12 +76,12 @@ def test_parse(self):
self.assert_(gpu_stat.temperature > 40 and gpu_stat.temperature < 90, "Invalid temperature readings. Temperature: %d" % gpu_stat.temperature)
self.assert_(gpu_stat.fan_speed > 0 and gpu_stat.fan_speed < 471, "Invalid fan speed readings. Fan Speed %f" % gpu_stat.fan_speed)

diag_stat = computer_hw.gpu_status_to_diag(gpu_stat)
diag_stat = NVidiaTempMonitor.gpu_status_to_diag(gpu_stat)

self.assert_(diag_stat.level == 0, "Diagnostics reports an error for nominal input. Message: %s" % diag_stat.message)

def test_high_temp_parse(self):
gpu_stat = computer_hw.parse_smi_output(self.high_temp_data)
gpu_stat = parse_smi_output(self.high_temp_data)

# Check valid
self.assert_(self.high_temp_data, "Unable to read sample output, no test to run")
Expand All @@ -93,17 +96,17 @@ def test_high_temp_parse(self):
self.assert_(gpu_stat.temperature > 90, "Invalid temperature readings. Temperature: %d" % gpu_stat.temperature)
self.assert_(gpu_stat.fan_speed > 0 and gpu_stat.fan_speed < 471, "Invalid fan speed readings. Fan Speed %s" % gpu_stat.fan_speed)

diag_stat = computer_hw.gpu_status_to_diag(gpu_stat)
diag_stat = NVidiaTempMonitor.gpu_status_to_diag(gpu_stat)

self.assert_(diag_stat.level == 1, "Diagnostics didn't report warning for high temp input. Level %d, Message: %s" % (diag_stat.level, diag_stat.message))


def test_empty_parse(self):
gpu_stat = computer_hw.parse_smi_output('')
gpu_stat = parse_smi_output('')

self.assert_(gpu_stat.temperature == 0, "Invalid temperature reading. Should be 0. Reading: %d" % gpu_stat.temperature)

diag_stat = computer_hw.gpu_status_to_diag(gpu_stat)
diag_stat = NVidiaTempMonitor.gpu_status_to_diag(gpu_stat)

self.assert_(diag_stat.level == 2, "Diagnostics didn't reports an error for empty input. Level: %d, Message: %s" % (diag_stat.level, diag_stat.message))

Expand Down

0 comments on commit ae82cde

Please sign in to comment.