Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions diagnostic_common_diagnostics/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ It publishes the usage percentage in a diagnostic message.
* Name of the node is "cpu_monitor_" + hostname.
* Uses the following args:
* warning_percentage: If the CPU usage is > warning_percentage, a WARN status will be publised.
* error_percentage: If the CPU usage is > error_percentage, an ERROR status will be published.
Comment on lines 16 to +17
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please also make the this more precise in terms of average vs single cpu

* window: the maximum length of the used collections.deque for queuing CPU readings.

### Published Topics
Expand Down Expand Up @@ -97,6 +98,7 @@ It publishes the usage percentage in a diagnostic message.
* Name of the node is "ram_monitor_" + hostname.
* Uses the following args:
* warning_percentage: If the RAM usage is > warning_percentage, a WARN status will be published.
* error_percentage: If the RAM usage is > error_percentage, an ERROR status will be published.
* window: the maximum length of the used collections.deque for queuing RAM readings.

### Published Topics
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,11 @@

class CpuTask(DiagnosticTask):

def __init__(self, warning_percentage=90, window=1):
def __init__(self, warning_percentage=90, error_percentage=100, window=1):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def __init__(self, warning_percentage=90, error_percentage=100, window=1):
def __init__(self, warning_percentage, error_percentage, window):

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this way its closer to the ram implementation and there are not too many place where defaults are set.

DiagnosticTask.__init__(self, 'CPU Information')

self._warning_percentage = int(warning_percentage)
self._error_percentage = int(error_percentage)
self._readings = deque(maxlen=window)

def _get_average_reading(self):
Expand All @@ -71,15 +72,15 @@ def run(self, stat):

stat.add('CPU Load Average', f'{cpu_average:.2f}')

warn = False
for idx, cpu_percentage in enumerate(cpu_percentages):
stat.add(f'CPU {idx} Load', f'{cpu_percentage:.2f}')
if cpu_percentage > self._warning_percentage:
warn = True

if warn:
if cpu_average > self._error_percentage:
stat.summary(DiagnosticStatus.ERROR,
f'CPU Average exceeds {self._error_percentage} percent')
elif cpu_average > self._warning_percentage:
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This changes the behavior, because previously this warning was done per individual cpu. This is not bad per se. But the default behaviour should stay as it was previously

stat.summary(DiagnosticStatus.WARN,
f'At least one CPU exceeds {self._warning_percentage} percent')
f'CPU Average exceeds {self._warning_percentage} percent')
else:
stat.summary(DiagnosticStatus.OK,
f'CPU Average {cpu_average:.2f} percent')
Expand All @@ -100,16 +101,20 @@ def main(args=None):

# Declare and get parameters
node.declare_parameter('warning_percentage', 90)
node.declare_parameter('error_percentage', 100)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

100 is a weird default value. I suggest 95

node.declare_parameter('window', 1)

warning_percentage = node.get_parameter(
'warning_percentage').get_parameter_value().integer_value
error_percentage = node.get_parameter(
'error_percentage').get_parameter_value().integer_value
window = node.get_parameter('window').get_parameter_value().integer_value

# Create diagnostic updater with default updater rate of 1 hz
updater = Updater(node)
updater.setHardwareID(hostname)
updater.add(CpuTask(warning_percentage=warning_percentage, window=window))
updater.add(CpuTask(warning_percentage=warning_percentage, error_percentage=error_percentage,
window=window))

rclpy.spin(node)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,10 @@

class RamTask(DiagnosticTask):

def __init__(self, warning_percentage, window):
def __init__(self, warning_percentage, error_percentage, window):
DiagnosticTask.__init__(self, 'RAM Information')
self._warning_percentage = int(warning_percentage)
self._error_percentage = int(error_percentage)
self._readings = collections.deque(maxlen=window)

def run(self, stat):
Expand All @@ -59,7 +60,12 @@ def run(self, stat):

stat.add('RAM Load Average', f'{ram_average:.2f}')

if ram_average > self._warning_percentage:
if ram_average > self._error_percentage:
stat.summary(
DiagnosticStatus.ERROR,
f'RAM Average exceeds {self._error_percentage:d} percent',
)
elif ram_average > self._warning_percentage:
stat.summary(
DiagnosticStatus.WARN,
f'RAM Average exceeds {self._warning_percentage:d} percent',
Expand All @@ -84,6 +90,7 @@ def main():
updater.add(
RamTask(
node.declare_parameter('warning_percentage', 90).value,
node.declare_parameter('error_percentage', 100).value,
node.declare_parameter('window', 1).value,
)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def test_warn(self):
print(f'Raw readings: {task._readings}')
self.assertEqual(task.name, 'CPU Information')
self.assertEqual(stat.level, DiagnosticStatus.WARN)
self.assertIn(str('At least one CPU exceeds'), stat.message)
self.assertIn(str('CPU Average exceeds'), stat.message)

# Check for at least 1 CPU Load Average and 1 CPU Load
self.assertGreaterEqual(len(stat.values), 2)
Expand Down