Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions nodescraper/plugins/inband/kernel/analyzer_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
# SOFTWARE.
#
###############################################################################
from typing import Union
from typing import Optional, Union

from pydantic import Field, field_validator

Expand All @@ -33,6 +33,7 @@

class KernelAnalyzerArgs(AnalyzerArgs):
exp_kernel: Union[str, list] = Field(default_factory=list)
exp_numa: Optional[int] = None
regex_match: bool = False

@field_validator("exp_kernel", mode="before")
Expand Down Expand Up @@ -61,4 +62,7 @@ def build_from_model(cls, datamodel: KernelDataModel) -> "KernelAnalyzerArgs":
Returns:
KernelAnalyzerArgs: instance of analyzer args class
"""
return cls(exp_kernel=datamodel.kernel_version)
return cls(
exp_kernel=datamodel.kernel_version,
exp_numa=datamodel.numa_balancing,
)
55 changes: 39 additions & 16 deletions nodescraper/plugins/inband/kernel/kernel_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,21 @@ def analyze_data(
Returns:
TaskResult: Result of the analysis containing status and message.
"""
if not args:
correct_kernel_version = False
correct_numa_setting = False
# skip check if data not provided in config
if not args or not args.exp_kernel:
self.result.message = "Expected kernel not provided"
self.result.status = ExecutionStatus.NOT_RAN
return self.result

if (
args.exp_numa is None
or data.numa_balancing is None
or data.numa_balancing == args.exp_numa
):
correct_numa_setting = True

for kernel in args.exp_kernel:
if args.regex_match:
try:
Expand All @@ -69,21 +79,34 @@ def analyze_data(
)
continue
if regex_data.match(data.kernel_version):
self.result.message = "Kernel matches expected"
self.result.status = ExecutionStatus.OK
return self.result
correct_kernel_version = True
break
elif data.kernel_version == kernel:
self.result.message = "Kernel matches expected"
self.result.status = ExecutionStatus.OK
return self.result
correct_kernel_version = True
break

if not correct_kernel_version:
self.result.message = "unexpected kernel data!"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lets leave the wording to what it was before: "Kernel matches expected" and "Kernel mismatch"

self.result.status = ExecutionStatus.ERROR
self._log_event(
category=EventCategory.OS,
description="unexpected kernel version!",
data={"expected": args.exp_kernel, "actual": data.kernel_version},
priority=EventPriority.CRITICAL,
console_log=True,
)
elif not correct_numa_setting:
self.result.message = "unexpected kernel data!"
self.result.status = ExecutionStatus.ERROR
self._log_event(
category=EventCategory.OS,
description="unexpected numa_balancing setting!",
data={"expected": args.exp_numa, "actual": data.numa_balancing},
priority=EventPriority.CRITICAL,
console_log=True,
)
else:
self.result.message = "Kernel matches expected"
self.result.status = ExecutionStatus.OK

self.result.message = "Kernel mismatch!"
self.result.status = ExecutionStatus.ERROR
self._log_event(
category=EventCategory.OS,
description=f"Kernel mismatch! Expected: {args.exp_kernel}, actual: {data.kernel_version}",
data={"expected": args.exp_kernel, "actual": data.kernel_version},
priority=EventPriority.CRITICAL,
console_log=True,
)
return self.result
11 changes: 10 additions & 1 deletion nodescraper/plugins/inband/kernel/kernel_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class KernelCollector(InBandDataCollector[KernelDataModel, None]):
DATA_MODEL = KernelDataModel
CMD_WINDOWS = "wmic os get Version /Value"
CMD = "sh -c 'uname -a'"
CMD_NUMA_BALANCING = "sh -c 'cat /proc/sys/kernel/numa_balancing'"

def _parse_kernel_version(self, uname_a: str) -> Optional[str]:
"""Extract the kernel release from `uname -a` output.
Expand Down Expand Up @@ -77,6 +78,7 @@ def collect_data(

kernel = None
kernel_info = None
numa_balancing = None

if self.system_info.os_family == OSFamily.WINDOWS:
res = self._run_sut_cmd(self.CMD_WINDOWS)
Expand All @@ -90,6 +92,9 @@ def collect_data(
if res.exit_code == 0:
kernel_info = res.stdout
kernel = self._parse_kernel_version(kernel_info)
numa_res = self._run_sut_cmd(self.CMD_NUMA_BALANCING)
if numa_res.exit_code == 0 and numa_res.stdout.strip().isdigit():
numa_balancing = int(numa_res.stdout.strip())
if not kernel:
self._log_event(
category=EventCategory.OS,
Expand All @@ -110,7 +115,11 @@ def collect_data(

if kernel_info and kernel:

kernel_data = KernelDataModel(kernel_info=kernel_info, kernel_version=kernel)
kernel_data = KernelDataModel(
kernel_info=kernel_info,
kernel_version=kernel,
numa_balancing=numa_balancing,
)
self._log_event(
category="KERNEL_READ",
description="Kernel version read",
Expand Down
3 changes: 3 additions & 0 deletions nodescraper/plugins/inband/kernel/kerneldata.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,12 @@
#
###############################################################################

from typing import Optional

from nodescraper.models import DataModel


class KernelDataModel(DataModel):
kernel_info: str
kernel_version: str
numa_balancing: Optional[int] = None
1 change: 1 addition & 0 deletions test/functional/fixtures/kernel_plugin_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"KernelPlugin": {
"analysis_args": {
"exp_kernel": "5.11-generic",
"exp_numa": 0,
"regex_match": false
}
}
Expand Down
85 changes: 72 additions & 13 deletions test/unit/plugin/test_kernel_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def model_obj():
return KernelDataModel(
kernel_info="Linux MockSystem 5.13.0-30-generic #1 XYZ Day Month 10 15:19:13 EDT 2024 x86_64 x86_64 x86_64 GNU/Linux",
kernel_version="5.13.0-30-generic",
numa_balancing=0,
)


Expand All @@ -54,7 +55,7 @@ def config():


def test_all_good_data(system_info, model_obj, config):
args = KernelAnalyzerArgs(exp_kernel=config["kernel_name"])
args = KernelAnalyzerArgs(exp_kernel=config["kernel_name"], exp_numa=0)
analyzer = KernelAnalyzer(system_info)
result = analyzer.analyze_data(model_obj, args)

Expand All @@ -64,7 +65,7 @@ def test_all_good_data(system_info, model_obj, config):


def test_all_good_data_strings(system_info, model_obj, config):
args = KernelAnalyzerArgs(exp_kernel=config["kernel_name"][0])
args = KernelAnalyzerArgs(exp_kernel=config["kernel_name"][0], exp_numa=0)
analyzer = KernelAnalyzer(system_info)
result = analyzer.analyze_data(model_obj, args)

Expand All @@ -85,62 +86,68 @@ def test_no_config_data(system_info, model_obj):


def test_invalid_kernel(system_info, model_obj, config):
args = KernelAnalyzerArgs(exp_kernel=config["kernel_name"])
args = KernelAnalyzerArgs(exp_kernel=config["kernel_name"], exp_numa=0)
model_obj.kernel_version = "some_invalid"

analyzer = KernelAnalyzer(system_info)
result = analyzer.analyze_data(model_obj, args=args)

assert result.status == ExecutionStatus.ERROR
assert "Kernel mismatch" in result.message
assert "unexpected kernel data!" in result.message
assert any(
event.priority == EventPriority.CRITICAL and event.category == EventCategory.OS.value
event.priority == EventPriority.CRITICAL
and event.category == EventCategory.OS.value
and "unexpected kernel version!" in event.description
for event in result.events
)


def test_unexpected_kernel(system_info, model_obj):
args = KernelAnalyzerArgs(exp_kernel=["5.18.2-mi300-build"])
args = KernelAnalyzerArgs(exp_kernel=["5.18.2-mi300-build"], exp_numa=0)
analyzer = KernelAnalyzer(system_info)
result = analyzer.analyze_data(model_obj, args)

assert result.status == ExecutionStatus.ERROR
assert "Kernel mismatch!" in result.message
assert "unexpected kernel data!" in result.message
assert any(
event.priority == EventPriority.CRITICAL and event.category == EventCategory.OS.value
for event in result.events
)


def test_invalid_kernel_config(system_info, model_obj, config):
args = KernelAnalyzerArgs(exp_kernel=config["invalid"])
args = KernelAnalyzerArgs(exp_kernel=config["invalid"], exp_numa=0)
analyzer = KernelAnalyzer(system_info)
result = analyzer.analyze_data(model_obj, args)

assert result.status == ExecutionStatus.ERROR


def test_match_regex(system_info, model_obj):
args = KernelAnalyzerArgs(exp_kernel=[r".*5\.13\.\d+-\d+-[\w-]+.*"], regex_match=True)
args = KernelAnalyzerArgs(
exp_kernel=[r".*5\.13\.\d+-\d+-[\w-]+.*"], regex_match=True, exp_numa=0
)
analyzer = KernelAnalyzer(system_info)
result = analyzer.analyze_data(model_obj, args)
assert result.status == ExecutionStatus.OK


def test_mismatch_regex(system_info, model_obj):
args = KernelAnalyzerArgs(exp_kernel=[r".*4\.13\.\d+-\d+-[\w-]+.*"], regex_match=True)
args = KernelAnalyzerArgs(
exp_kernel=[r".*4\.13\.\d+-\d+-[\w-]+.*"], regex_match=True, exp_numa=0
)
analyzer = KernelAnalyzer(system_info)
result = analyzer.analyze_data(model_obj, args)

assert result.status == ExecutionStatus.ERROR
assert len(result.events) == 1
assert result.events[0].priority == EventPriority.CRITICAL
assert result.events[0].category == EventCategory.OS.value
assert "Kernel mismatch!" in result.events[0].description
assert "unexpected kernel version!" in result.events[0].description


def test_bad_regex(system_info, model_obj):
args = KernelAnalyzerArgs(exp_kernel=[r"4.[3.\d-\d+-[\w]+"], regex_match=True)
args = KernelAnalyzerArgs(exp_kernel=[r"4.[3.\d-\d+-[\w]+"], regex_match=True, exp_numa=0)
analyzer = KernelAnalyzer(system_info)
result = analyzer.analyze_data(model_obj, args)

Expand All @@ -151,4 +158,56 @@ def test_bad_regex(system_info, model_obj):
assert result.events[0].description == "Kernel regex is invalid"
assert result.events[1].priority == EventPriority.CRITICAL
assert result.events[1].category == EventCategory.OS.value
assert "Kernel mismatch!" in result.events[1].description
assert "unexpected kernel version!" in result.events[1].description


def test_unexpected_numa(system_info, model_obj, config):
"""Test with config specifying a different numa value than actual."""
args = KernelAnalyzerArgs(
exp_kernel=config["kernel_name"][0],
exp_numa=1,
)
analyzer = KernelAnalyzer(system_info)
result = analyzer.analyze_data(model_obj, args)

assert result.status == ExecutionStatus.ERROR
assert "unexpected kernel data!" in result.message
assert any(
event.priority == EventPriority.CRITICAL
and event.category == EventCategory.OS.value
and "unexpected numa_balancing setting!" in event.description
for event in result.events
)


def test_no_expected_numa(system_info, model_obj, config):
"""Test with no expected numa provided to analyzer (NUMA check skipped)."""
args = KernelAnalyzerArgs(exp_kernel=config["kernel_name"][0], exp_numa=None)
analyzer = KernelAnalyzer(system_info)
result = analyzer.analyze_data(model_obj, args)

assert result.status == ExecutionStatus.OK
assert "Kernel matches expected" in result.message
assert all(
event.priority not in [EventPriority.WARNING, EventPriority.ERROR, EventPriority.CRITICAL]
for event in result.events
)


def test_no_numa_balancing(system_info, model_obj, config):
"""Test when data has no numa_balancing (e.g. not collected); NUMA check passes."""
data_no_numa = KernelDataModel(
kernel_info=model_obj.kernel_info,
kernel_version=config["kernel_name"][0],
numa_balancing=None,
)
args = KernelAnalyzerArgs(exp_kernel=config["kernel_name"][0], exp_numa=0)
analyzer = KernelAnalyzer(system_info)
result = analyzer.analyze_data(data_no_numa, args)

assert result.status == ExecutionStatus.OK
assert "Kernel matches expected" in result.message
assert all(
event.priority not in [EventPriority.WARNING, EventPriority.ERROR, EventPriority.CRITICAL]
for event in result.events
)
Loading