From ef0568663bc05335a9b359b7458b046c1b233a3d Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Thu, 12 Feb 2026 10:08:29 -0600 Subject: [PATCH] updates to logging --- nodescraper/cli/cli.py | 55 +-------------- nodescraper/cli/helper.py | 47 +++++++++++++ nodescraper/models/taskresult.py | 70 +++++++++++++++++++ .../taskresulthooks/filesystemloghook.py | 41 +---------- test/unit/framework/test_cli.py | 20 +++--- test/unit/framework/test_cli_helper.py | 12 ++-- 6 files changed, 139 insertions(+), 106 deletions(-) diff --git a/nodescraper/cli/cli.py b/nodescraper/cli/cli.py index 94a1f0fb..1b15afec 100644 --- a/nodescraper/cli/cli.py +++ b/nodescraper/cli/cli.py @@ -45,6 +45,7 @@ log_system_info, parse_describe, parse_gen_plugin_config, + process_args, ) from nodescraper.cli.inputargtypes import ModelArgHandler, json_arg, log_path_arg from nodescraper.configregistry import ConfigRegistry @@ -306,60 +307,6 @@ def setup_logger(log_level: str = "INFO", log_path: Optional[str] = None) -> log return logger -def process_args( - raw_arg_input: list[str], plugin_names: list[str] -) -> tuple[list[str], dict[str, list[str]]]: - """separate top level args from plugin args - - Args: - raw_arg_input (list[str]): list of all arg input - plugin_names (list[str]): list of plugin names - - Returns: - tuple[list[str], dict[str, list[str]]]: tuple of top level args - and dict of plugin name to plugin args - """ - top_level_args = raw_arg_input - - try: - plugin_arg_index = raw_arg_input.index("run-plugins") - except ValueError: - plugin_arg_index = -1 - - plugin_arg_map = {} - invalid_plugins = [] - if plugin_arg_index != -1 and plugin_arg_index != len(raw_arg_input) - 1: - top_level_args = raw_arg_input[: plugin_arg_index + 1] - plugin_args = raw_arg_input[plugin_arg_index + 1 :] - - # handle help case - if plugin_args == ["-h"]: - top_level_args += plugin_args - else: - cur_plugin = None - for arg in plugin_args: - # Handle comma-separated plugin names (but not arguments) - if not arg.startswith("-") and "," in arg: - # Split comma-separated plugin names - for potential_plugin in arg.split(","): - potential_plugin = potential_plugin.strip() - if potential_plugin in plugin_names: - plugin_arg_map[potential_plugin] = [] - cur_plugin = potential_plugin - elif potential_plugin: - # Track invalid plugin names to log event later - invalid_plugins.append(potential_plugin) - elif arg in plugin_names: - plugin_arg_map[arg] = [] - cur_plugin = arg - elif cur_plugin: - plugin_arg_map[cur_plugin].append(arg) - elif not arg.startswith("-"): - # Track invalid plugin names to log event later - invalid_plugins.append(arg) - return (top_level_args, plugin_arg_map, invalid_plugins) - - def main(arg_input: Optional[list[str]] = None): """Main entry point for the CLI diff --git a/nodescraper/cli/helper.py b/nodescraper/cli/helper.py index 5cb97a73..dc3804b6 100644 --- a/nodescraper/cli/helper.py +++ b/nodescraper/cli/helper.py @@ -358,6 +358,53 @@ def generate_reference_config( return plugin_config +def process_args( + raw_arg_input: list[str], plugin_names: list[str] +) -> tuple[list[str], dict[str, list[str]], list[str]]: + """Separate top level args from plugin args. + + Args: + raw_arg_input: list of all arg input + plugin_names: list of plugin names + + Returns: + tuple of (top_level_args, plugin_arg_map, invalid_plugins) + """ + top_level_args = raw_arg_input + try: + plugin_arg_index = raw_arg_input.index("run-plugins") + except ValueError: + plugin_arg_index = -1 + + plugin_arg_map: dict[str, list[str]] = {} + invalid_plugins: list[str] = [] + if plugin_arg_index != -1 and plugin_arg_index != len(raw_arg_input) - 1: + top_level_args = raw_arg_input[: plugin_arg_index + 1] + plugin_args = raw_arg_input[plugin_arg_index + 1 :] + + if plugin_args == ["-h"]: + top_level_args += plugin_args + else: + cur_plugin = None + for arg in plugin_args: + if not arg.startswith("-") and "," in arg: + for potential_plugin in arg.split(","): + potential_plugin = potential_plugin.strip() + if potential_plugin in plugin_names: + plugin_arg_map[potential_plugin] = [] + cur_plugin = potential_plugin + elif potential_plugin: + invalid_plugins.append(potential_plugin) + elif arg in plugin_names: + plugin_arg_map[arg] = [] + cur_plugin = arg + elif cur_plugin: + plugin_arg_map[cur_plugin].append(arg) + elif not arg.startswith("-"): + invalid_plugins.append(arg) + return (top_level_args, plugin_arg_map, invalid_plugins) + + def generate_reference_config_from_logs( path: str, plugin_reg: PluginRegistry, logger: logging.Logger ) -> PluginConfig: diff --git a/nodescraper/models/taskresult.py b/nodescraper/models/taskresult.py index dd5990ef..d04618f4 100644 --- a/nodescraper/models/taskresult.py +++ b/nodescraper/models/taskresult.py @@ -24,12 +24,15 @@ # ############################################################################### import datetime +import json import logging +import os from typing import Any, Optional from pydantic import BaseModel, Field, field_serializer, field_validator from nodescraper.enums import EventPriority, ExecutionStatus +from nodescraper.utils import get_unique_filename, pascal_to_snake from .event import Event @@ -102,6 +105,73 @@ def duration(self) -> Optional[str]: return duration + @property + def source(self) -> str: + """Task/source name (alias for task for error-scraper compatibility).""" + return self.task or "" + + @property + def source_type(self) -> str: + """Task/source type (alias for parent for error-scraper compatibility).""" + return self.parent or "" + + @property + def summary_dict(self) -> dict: + """Summary dict for logging/display (task_name, task_type, task_result, event_count, duration).""" + return { + "task_name": self.source or self.parent or "", + "task_type": self.source_type or self.task or "", + "task_result": self.status.name if self.status else None, + "event_count": len(self.events), + "duration": self.duration, + } + + @property + def summary_str(self) -> str: + """Message string for display.""" + return self.message or "" + + def log_result(self, log_path: str) -> None: + """Write result, artifacts, and events to log_path. Events are merged into a single events.json.""" + from nodescraper.connection.inband import BaseFileArtifact + + os.makedirs(log_path, exist_ok=True) + + with open(os.path.join(log_path, "result.json"), "w", encoding="utf-8") as log_file: + log_file.write(self.model_dump_json(exclude={"artifacts", "events"}, indent=2)) + + artifact_map: dict[str, list[dict[str, Any]]] = {} + for artifact in self.artifacts: + if isinstance(artifact, BaseFileArtifact): + artifact.log_model(log_path) + else: + name = f"{pascal_to_snake(artifact.__class__.__name__)}s" + if name in artifact_map: + artifact_map[name].append(artifact.model_dump(mode="json")) + else: + artifact_map[name] = [artifact.model_dump(mode="json")] + + for name, artifacts in artifact_map.items(): + log_name = get_unique_filename(log_path, f"{name}.json") + with open(os.path.join(log_path, log_name), "w", encoding="utf-8") as log_file: + json.dump(artifacts, log_file, indent=2) + + if self.events: + event_log = os.path.join(log_path, "events.json") + new_events = [e.model_dump(mode="json", exclude_none=True) for e in self.events] + existing_events = [] + if os.path.isfile(event_log): + try: + with open(event_log, "r", encoding="utf-8") as f: + existing_events = json.load(f) + if not isinstance(existing_events, list): + existing_events = [] + except (json.JSONDecodeError, OSError): + existing_events = [] + all_events = existing_events + new_events + with open(event_log, "w", encoding="utf-8") as log_file: + json.dump(all_events, log_file, indent=2) + def _get_event_summary(self) -> str: """Get summary string for events diff --git a/nodescraper/taskresulthooks/filesystemloghook.py b/nodescraper/taskresulthooks/filesystemloghook.py index 34abb9f8..831e3fbe 100644 --- a/nodescraper/taskresulthooks/filesystemloghook.py +++ b/nodescraper/taskresulthooks/filesystemloghook.py @@ -23,14 +23,12 @@ # SOFTWARE. # ############################################################################### -import json import os from typing import Optional -from nodescraper.connection.inband import BaseFileArtifact from nodescraper.interfaces.taskresulthook import TaskResultHook from nodescraper.models import DataModel, TaskResult -from nodescraper.utils import get_unique_filename, pascal_to_snake +from nodescraper.utils import pascal_to_snake class FileSystemLogHook(TaskResultHook): @@ -42,47 +40,14 @@ def __init__(self, log_base_path=None, **kwargs) -> None: self.log_base_path = log_base_path def process_result(self, task_result: TaskResult, data: Optional[DataModel] = None, **kwargs): - """log task result to the filesystem - - Args: - task_result (TaskResult): input task result - """ + """Log task result to the filesystem (single events.json per directory).""" log_path = self.log_base_path if task_result.parent: log_path = os.path.join(log_path, pascal_to_snake(task_result.parent)) if task_result.task: log_path = os.path.join(log_path, pascal_to_snake(task_result.task)) - os.makedirs(log_path, exist_ok=True) - - with open(os.path.join(log_path, "result.json"), "w", encoding="utf-8") as log_file: - log_file.write(task_result.model_dump_json(exclude={"artifacts", "events"}, indent=2)) - - artifact_map = {} - for artifact in task_result.artifacts: - if isinstance(artifact, BaseFileArtifact): - log_name = get_unique_filename(log_path, artifact.filename) - artifact.log_model(log_path) - - else: - name = f"{pascal_to_snake(artifact.__class__.__name__)}s" - if name in artifact_map: - artifact_map[name].append(artifact.model_dump(mode="json")) - else: - artifact_map[name] = [artifact.model_dump(mode="json")] - - for name, artifacts in artifact_map.items(): - log_name = get_unique_filename(log_path, f"{name}.json") - with open(os.path.join(log_path, log_name), "w", encoding="utf-8") as log_file: - json.dump(artifacts, log_file, indent=2) - - if task_result.events: - event_log = get_unique_filename(log_path, "events.json") - events = [ - event.model_dump(mode="json", exclude_none=True) for event in task_result.events - ] - with open(os.path.join(log_path, event_log), "w", encoding="utf-8") as log_file: - json.dump(events, log_file, indent=2) + task_result.log_result(log_path) if data: data.log_model(log_path) diff --git a/test/unit/framework/test_cli.py b/test/unit/framework/test_cli.py index cd266ed9..014befa8 100644 --- a/test/unit/framework/test_cli.py +++ b/test/unit/framework/test_cli.py @@ -29,14 +29,16 @@ import pytest from pydantic import BaseModel -from nodescraper.cli import cli, inputargtypes +from nodescraper.cli import inputargtypes +from nodescraper.cli.helper import get_system_info, process_args +from nodescraper.cli.inputargtypes import ModelArgHandler, json_arg, log_path_arg from nodescraper.enums import SystemLocation from nodescraper.models import SystemInfo def test_log_path_arg(): - assert cli.log_path_arg("test") == "test" - assert cli.log_path_arg("none") is None + assert log_path_arg("test") == "test" + assert log_path_arg("none") is None @pytest.mark.parametrize( @@ -66,16 +68,16 @@ def test_dict_arg(): def test_json_arg(framework_fixtures_path): - assert cli.json_arg(os.path.join(framework_fixtures_path, "example.json")) == {"test": 123} + assert json_arg(os.path.join(framework_fixtures_path, "example.json")) == {"test": 123} with pytest.raises(argparse.ArgumentTypeError): - cli.json_arg(os.path.join(framework_fixtures_path, "invalid.json")) + json_arg(os.path.join(framework_fixtures_path, "invalid.json")) def test_model_arg(framework_fixtures_path): class TestArg(BaseModel): test: int - arg_handler = cli.ModelArgHandler(TestArg) + arg_handler = ModelArgHandler(TestArg) assert arg_handler.process_file_arg( os.path.join(framework_fixtures_path, "example.json") ) == TestArg(test=123) @@ -85,7 +87,7 @@ class TestArg(BaseModel): def test_system_info_builder(): - assert cli.get_system_info( + assert get_system_info( argparse.Namespace( sys_name="test_name", sys_sku="test_sku", @@ -98,7 +100,7 @@ def test_system_info_builder(): ) with pytest.raises(argparse.ArgumentTypeError): - cli.get_system_info( + get_system_info( argparse.Namespace( sys_name="test_name", sys_sku="test_sku", @@ -149,4 +151,4 @@ def test_system_info_builder(): ], ) def test_process_args(raw_arg_input, plugin_names, exp_output): - assert cli.process_args(raw_arg_input, plugin_names) == exp_output + assert process_args(raw_arg_input, plugin_names) == exp_output diff --git a/test/unit/framework/test_cli_helper.py b/test/unit/framework/test_cli_helper.py index 7d008e68..6c2c955f 100644 --- a/test/unit/framework/test_cli_helper.py +++ b/test/unit/framework/test_cli_helper.py @@ -37,13 +37,15 @@ from conftest import DummyDataModel from pydantic import BaseModel -from nodescraper.cli import cli from nodescraper.cli.helper import ( build_config, dump_results_to_csv, dump_to_csv, find_datamodel_and_result, + generate_reference_config, + generate_reference_config_from_logs, generate_summary, + get_plugin_configs, ) from nodescraper.configregistry import ConfigRegistry from nodescraper.enums import ExecutionStatus, SystemInteractionLevel @@ -71,14 +73,14 @@ def test_generate_reference_config(plugin_registry): ) ] - ref_config = cli.generate_reference_config(results, plugin_registry, logging.getLogger()) + ref_config = generate_reference_config(results, plugin_registry, logging.getLogger()) dump = ref_config.dict() assert dump["plugins"] == {"TestPluginA": {"analysis_args": {"model_attr": 17}}} def test_get_plugin_configs(): with pytest.raises(argparse.ArgumentTypeError): - cli.get_plugin_configs( + get_plugin_configs( system_interaction_level="INVALID", plugin_config_input=[], built_in_configs={}, @@ -86,7 +88,7 @@ def test_get_plugin_configs(): plugin_subparser_map={}, ) - plugin_configs = cli.get_plugin_configs( + plugin_configs = get_plugin_configs( system_interaction_level="PASSIVE", plugin_config_input=[], built_in_configs={}, @@ -180,7 +182,7 @@ def build_from_model(cls, datamodel): plugins={parent: SimpleNamespace(DATA_MODEL=FakeDataModel, ANALYZER_ARGS=FakeArgs)} ) - cfg = cli.generate_reference_config_from_logs(str(framework_fixtures_path), plugin_reg, logger) + cfg = generate_reference_config_from_logs(str(framework_fixtures_path), plugin_reg, logger) assert isinstance(cfg, PluginConfig) assert set(cfg.plugins) == {parent}