diff --git a/src/detectmatelibrary/common/_config/__init__.py b/src/detectmatelibrary/common/_config/__init__.py index 297201c..a21d7ae 100644 --- a/src/detectmatelibrary/common/_config/__init__.py +++ b/src/detectmatelibrary/common/_config/__init__.py @@ -59,6 +59,7 @@ def to_dict(self, method_id: str) -> Dict[str, Any]: # Collect all non-meta fields for params params = {} events_data = None + instances_data = None for field_name, field_value in self: # Skip meta fields @@ -72,6 +73,13 @@ def to_dict(self, method_id: str) -> Dict[str, Any]: events_data = field_value.to_dict() else: events_data = field_value + # Handle global instances specially (top-level, not in params) + # Serialized as "global" in YAML (Python field is "global_instances") + elif field_name == "global_instances" and field_value: + instances_data = { + name: inst.to_dict() + for name, inst in field_value.items() + } else: # All other fields go into params params[field_name] = field_value @@ -80,6 +88,10 @@ def to_dict(self, method_id: str) -> Dict[str, Any]: if params: result["params"] = params + # Add global instances if they exist (serialized as "global" in YAML) + if instances_data is not None: + result["global"] = instances_data + # Add events if they exist if events_data is not None: result["events"] = events_data diff --git a/src/detectmatelibrary/common/_config/_compile.py b/src/detectmatelibrary/common/_config/_compile.py index d61ce8d..5198629 100644 --- a/src/detectmatelibrary/common/_config/_compile.py +++ b/src/detectmatelibrary/common/_config/_compile.py @@ -1,4 +1,4 @@ -from detectmatelibrary.common._config._formats import EventsConfig +from detectmatelibrary.common._config._formats import EventsConfig, _EventInstance from typing import Any, Dict, List, Sequence, Tuple, Union import warnings @@ -93,8 +93,9 @@ def check_type(config: Dict[str, Any], method_type: str) -> None: def process(config: Dict[str, Any]) -> Dict[str, Any]: has_params = "params" in config has_events = "events" in config + has_instances = "global" in config - if not has_params and not has_events and not config.get("auto_config", False): + if not has_params and not has_events and not has_instances and not config.get("auto_config", False): warnings.warn(MissingParamsWarning()) if has_params: @@ -108,11 +109,19 @@ def process(config: Dict[str, Any]) -> Dict[str, Any]: if has_events: config["events"] = EventsConfig._init(config["events"]) + # Handle "global" key: event-ID-independent global instances + # Renamed to "global_instances" to avoid collision with Python keyword + if has_instances: + config["global_instances"] = { + name: _EventInstance._init(**data) + for name, data in config.pop("global").items() + } + return config def generate_detector_config( - variable_selection: Dict[int, List[Union[str, Tuple[str, ...]]]], + variable_selection: Dict[int | str, List[Union[str, Tuple[str, ...]]]], detector_name: str, method_type: str, **additional_params: Any @@ -158,7 +167,7 @@ def generate_detector_config( """ var_pattern = re.compile(r"^var_(\d+)$") - events_config: Dict[int, Dict[str, Any]] = {} + events_config: Dict[int | str, Dict[str, Any]] = {} for event_id, variable_names in variable_selection.items(): instances: Dict[str, Any] = {} diff --git a/src/detectmatelibrary/common/core.py b/src/detectmatelibrary/common/core.py index 5f6e274..17eaa9c 100644 --- a/src/detectmatelibrary/common/core.py +++ b/src/detectmatelibrary/common/core.py @@ -53,9 +53,25 @@ def describe(self) -> str: return descriptions[self.value] +class ConfigState(Enum): + DEFAULT = 0 + STOP_CONFIGURE = 1 + KEEP_CONFIGURE = 2 + + def describe(self) -> str: + descriptions = [ + "Follow default configuration behavior.", + "Force stop configuration.", + "Keep configuring regardless of default behavior." + ] + + return descriptions[self.value] + + class CoreConfig(BasicConfig): start_id: int = 10 data_use_training: int | None = None + data_use_configure: int | None = None def do_training(config: CoreConfig, index: int, train_state: TrainState) -> bool: @@ -67,6 +83,15 @@ def do_training(config: CoreConfig, index: int, train_state: TrainState) -> bool return config.data_use_training is not None and config.data_use_training > index +def do_configure(config: CoreConfig, index: int, configure_state: ConfigState) -> bool: + if configure_state == ConfigState.STOP_CONFIGURE: + return False + elif configure_state == ConfigState.KEEP_CONFIGURE: + return True + + return config.data_use_configure is not None and config.data_use_configure > index + + class CoreComponent: """Base class for all components in the system.""" def __init__( @@ -86,6 +111,9 @@ def __init__( self.id_generator = SimpleIDGenerator(self.config.start_id) self.data_used_train = 0 self.train_state: TrainState = TrainState.DEFAULT + self.data_used_configure = 0 + self.configure_state: ConfigState = ConfigState.DEFAULT + self._configuration_done = False def __repr__(self) -> str: return f"<{self.type_}> {self.name}: {self.config}" @@ -100,6 +128,14 @@ def train( ) -> None: pass + def configure( + self, input_: List[BaseSchema] | BaseSchema, + ) -> None: + pass + + def set_configuration(self) -> None: + pass + def process(self, data: BaseSchema | bytes) -> BaseSchema | bytes | None: is_byte, data = SchemaPipeline.preprocess(self.input_schema(), data) logger.debug(f"<<{self.name}>> received:\n{data}") @@ -107,10 +143,25 @@ def process(self, data: BaseSchema | bytes) -> BaseSchema | bytes | None: if (data_buffered := self.data_buffer.add(data)) is None: # type: ignore return None - if do_training(config=self.config, index=self.data_used_train, train_state=self.train_state): - self.data_used_train += 1 - logger.info(f"<<{self.name}>> use data for training") - self.train(input_=data_buffered) + if do_configure( + config=self.config, + index=self.data_used_configure, + configure_state=self.configure_state + ): + self.data_used_configure += 1 + logger.info(f"<<{self.name}>> use data for configuration") + self.configure(input_=data_buffered) + return None + else: + if self.data_used_configure > 0 and not self._configuration_done: + self._configuration_done = True + logger.info(f"<<{self.name}>> finalizing configuration") + self.set_configuration() + + if do_training(config=self.config, index=self.data_used_train, train_state=self.train_state): + self.data_used_train += 1 + logger.info(f"<<{self.name}>> use data for training") + self.train(input_=data_buffered) output_ = self.output_schema() logger.info(f"<<{self.name}>> processing data") diff --git a/src/detectmatelibrary/common/detector.py b/src/detectmatelibrary/common/detector.py index 18b67b9..8ba8edb 100644 --- a/src/detectmatelibrary/common/detector.py +++ b/src/detectmatelibrary/common/detector.py @@ -1,4 +1,4 @@ -from detectmatelibrary.common._config._formats import EventsConfig +from detectmatelibrary.common._config._formats import EventsConfig, _EventInstance from detectmatelibrary.common.core import CoreComponent, CoreConfig from detectmatelibrary.utils.data_buffer import ArgsBuffer, BufferMode @@ -68,6 +68,27 @@ def get_configured_variables( return result +def get_global_variables( + input_: ParserSchema, + global_instances: Dict[str, _EventInstance], +) -> Dict[str, Any]: + """Extract header variables from event-ID-independent instances. + + Args: + input_: Parser schema containing logFormatVariables + global_instances: Dict of instance_name -> _EventInstance configs + + Returns: + Dict mapping variable names to their values from the input + """ + result: Dict[str, Any] = {} + for instance in global_instances.values(): + for name in instance.header_variables: + if name in input_["logFormatVariables"]: + result[name] = input_["logFormatVariables"][name] + return result + + class CoreDetectorConfig(CoreConfig): comp_type: str = "detectors" method_type: str = "core_detector" @@ -125,3 +146,13 @@ def train( self, input_: ParserSchema | list[ParserSchema] # type: ignore ) -> None: pass + + @override + def configure( + self, input_: ParserSchema | list[ParserSchema] # type: ignore + ) -> None: + pass + + @override + def set_configuration(self) -> None: + pass diff --git a/src/detectmatelibrary/constants.py b/src/detectmatelibrary/constants.py index e4a2dd7..6f81780 100644 --- a/src/detectmatelibrary/constants.py +++ b/src/detectmatelibrary/constants.py @@ -2,3 +2,4 @@ TIMESTAMP = "Time" EVENT_TEMPLATE = "EventTemplate" EVENT_ID = "EventId" +GLOBAL_EVENT_ID = "*" diff --git a/src/detectmatelibrary/detectors/new_value_combo_detector.py b/src/detectmatelibrary/detectors/new_value_combo_detector.py index 83c9af5..0445863 100644 --- a/src/detectmatelibrary/detectors/new_value_combo_detector.py +++ b/src/detectmatelibrary/detectors/new_value_combo_detector.py @@ -1,7 +1,12 @@ from detectmatelibrary.common._config import generate_detector_config -from detectmatelibrary.common._config._formats import EventsConfig +from detectmatelibrary.common._config._formats import EventsConfig, _EventInstance -from detectmatelibrary.common.detector import CoreDetectorConfig, CoreDetector, get_configured_variables +from detectmatelibrary.common.detector import ( + CoreDetectorConfig, + CoreDetector, + get_configured_variables, + get_global_variables +) from detectmatelibrary.utils.data_buffer import BufferMode from detectmatelibrary.utils.persistency.event_data_structures.trackers import ( @@ -10,6 +15,7 @@ from detectmatelibrary.utils.persistency.event_persistency import EventPersistency from detectmatelibrary.schemas import ParserSchema, DetectorSchema +from detectmatelibrary.constants import GLOBAL_EVENT_ID from typing import Any, Dict, Sequence, cast, Tuple from itertools import combinations @@ -48,6 +54,7 @@ class NewValueComboDetectorConfig(CoreDetectorConfig): method_type: str = "new_value_combo_detector" events: EventsConfig | dict[str, Any] = {} + global_instances: Dict[str, _EventInstance] = {} comb_size: int = 2 @@ -85,6 +92,14 @@ def train(self, input_: ParserSchema) -> None: # type: ignore event_template=input_["template"], named_variables=configured_variables ) + if config.global_instances: + global_vars = get_global_variables(input_, config.global_instances) + if global_vars: + self.persistency.ingest_event( + event_id=GLOBAL_EVENT_ID, + event_template=input_["template"], + named_variables=global_vars + ) def detect( self, input_: ParserSchema, output_: DetectorSchema # type: ignore @@ -110,6 +125,18 @@ def detect( ) overall_score += 1.0 + if config.global_instances and GLOBAL_EVENT_ID in known_events: + global_vars = get_global_variables(input_, config.global_instances) + global_combo_dict = get_combo(global_vars) + global_tracker = known_events[GLOBAL_EVENT_ID] + for combo_key, multi_tracker in global_tracker.get_data().items(): + value_tuple = global_combo_dict.get(combo_key) + if value_tuple is None: + continue + if value_tuple not in multi_tracker.unique_set: + alerts[f"Global - {combo_key}"] = f"Unknown value combination: {value_tuple}" + overall_score += 1.0 + if overall_score > 0: output_["score"] = overall_score output_["description"] = ( @@ -120,7 +147,7 @@ def detect( return True return False - def configure(self, input_: ParserSchema) -> None: + def configure(self, input_: ParserSchema) -> None: # type: ignore """Configure the detector based on the stability of individual variables, then learn value combinations based on that configuration.""" diff --git a/src/detectmatelibrary/detectors/new_value_detector.py b/src/detectmatelibrary/detectors/new_value_detector.py index dad5ecd..eab00c6 100644 --- a/src/detectmatelibrary/detectors/new_value_detector.py +++ b/src/detectmatelibrary/detectors/new_value_detector.py @@ -1,8 +1,12 @@ from detectmatelibrary.common._config._compile import generate_detector_config -from detectmatelibrary.common._config._formats import EventsConfig - -from detectmatelibrary.common.detector import CoreDetectorConfig, CoreDetector, get_configured_variables +from detectmatelibrary.common._config._formats import EventsConfig, _EventInstance +from detectmatelibrary.common.detector import ( + CoreDetectorConfig, + CoreDetector, + get_configured_variables, + get_global_variables +) from detectmatelibrary.utils.persistency.event_data_structures.trackers.stability.stability_tracker import ( EventStabilityTracker ) @@ -10,14 +14,16 @@ from detectmatelibrary.utils.data_buffer import BufferMode from detectmatelibrary.schemas import ParserSchema, DetectorSchema +from detectmatelibrary.constants import GLOBAL_EVENT_ID -from typing import Any +from typing import Any, Dict class NewValueDetectorConfig(CoreDetectorConfig): method_type: str = "new_value_detector" events: EventsConfig | dict[str, Any] = {} + global_instances: Dict[str, _EventInstance] = {} class NewValueDetector(CoreDetector): @@ -50,6 +56,14 @@ def train(self, input_: ParserSchema) -> None: # type: ignore event_template=input_["template"], named_variables=configured_variables ) + if self.config.global_instances: + global_vars = get_global_variables(input_, self.config.global_instances) + if global_vars: + self.persistency.ingest_event( + event_id=GLOBAL_EVENT_ID, + event_template=input_["template"], + named_variables=global_vars + ) def detect( self, input_: ParserSchema, output_: DetectorSchema # type: ignore @@ -74,6 +88,17 @@ def detect( ) overall_score += 1.0 + if self.config.global_instances and GLOBAL_EVENT_ID in known_events: + global_vars = get_global_variables(input_, self.config.global_instances) + global_tracker = known_events[GLOBAL_EVENT_ID] + for var_name, multi_tracker in global_tracker.get_data().items(): + value = global_vars.get(var_name) + if value is None: + continue + if value not in multi_tracker.unique_set: + alerts[f"Global - {var_name}"] = f"Unknown value: '{value}'" + overall_score += 1.0 + if overall_score > 0: output_["score"] = overall_score output_["description"] = f"{self.name} detects values not encountered in training as anomalies." @@ -82,7 +107,7 @@ def detect( return False - def configure(self, input_: ParserSchema) -> None: + def configure(self, input_: ParserSchema) -> None: # type: ignore self.auto_conf_persistency.ingest_event( event_id=input_["EventID"], event_template=input_["template"], diff --git a/src/detectmatelibrary/utils/persistency/event_persistency.py b/src/detectmatelibrary/utils/persistency/event_persistency.py index 42719d6..c21cb76 100644 --- a/src/detectmatelibrary/utils/persistency/event_persistency.py +++ b/src/detectmatelibrary/utils/persistency/event_persistency.py @@ -25,15 +25,15 @@ def __init__( *, event_data_kwargs: Optional[dict[str, Any]] = None, ): - self.events_data: Dict[int, EventDataStructure] = {} + self.events_data: Dict[int | str, EventDataStructure] = {} self.event_data_class = event_data_class self.event_data_kwargs = event_data_kwargs or {} self.variable_blacklist = variable_blacklist or [] - self.event_templates: Dict[int, str] = {} + self.event_templates: Dict[int | str, str] = {} def ingest_event( self, - event_id: int, + event_id: int | str, event_template: str, variables: list[Any] = [], named_variables: Dict[str, Any] = {} @@ -52,12 +52,12 @@ def ingest_event( data = data_structure.to_data(all_variables) data_structure.add_data(data) - def get_event_data(self, event_id: int) -> Any | None: + def get_event_data(self, event_id: int | str) -> Any | None: """Retrieve the data for a specific event ID.""" data_structure = self.events_data.get(event_id) return data_structure.get_data() if data_structure is not None else None - def get_events_data(self) -> Dict[int, EventDataStructure]: + def get_events_data(self) -> Dict[int | str, EventDataStructure]: """Retrieve the events data that is currently stored. Returns: @@ -77,11 +77,11 @@ def get_events_data(self) -> Dict[int, EventDataStructure]: """ return self.events_data - def get_event_template(self, event_id: int) -> str | None: + def get_event_template(self, event_id: int | str) -> str | None: """Retrieve the template for a specific event ID.""" return self.event_templates.get(event_id) - def get_event_templates(self) -> Dict[int, str]: + def get_event_templates(self) -> Dict[int | str, str]: """Retrieve all event templates.""" return self.event_templates @@ -107,7 +107,7 @@ def get_all_variables( }) return all_vars - def __getitem__(self, event_id: int) -> EventDataStructure | None: + def __getitem__(self, event_id: int | str) -> EventDataStructure | None: return self.events_data.get(event_id) def __repr__(self) -> str: diff --git a/tests/test_common/test_config_roundtrip.py b/tests/test_common/test_config_roundtrip.py index 85517e1..764b668 100644 --- a/tests/test_common/test_config_roundtrip.py +++ b/tests/test_common/test_config_roundtrip.py @@ -1,8 +1,9 @@ """Test that YAML -> Pydantic -> YAML is preserved (round-trip test).""" from detectmatelibrary.common._config import BasicConfig -from detectmatelibrary.common._config._formats import EventsConfig +from detectmatelibrary.common._config._formats import EventsConfig, _EventInstance +from typing import Dict import yaml @@ -20,6 +21,7 @@ class MockupDetectorConfig(BasicConfig): auto_config: bool = False parser: str = "" events: EventsConfig | None = None + global_instances: Dict[str, _EventInstance] = {} def load_test_config() -> dict: @@ -233,6 +235,40 @@ def test_true_roundtrip_preservation(self): # The two dicts should be identical assert dict1 == dict2 + def test_global_instance_roundtrip(self): + """Test that a detector config with a global instance round-trips + correctly.""" + config_yaml = load_test_config() + method_id = "detector_global_instance" + + # Load from YAML + config = MockupDetectorConfig.from_dict(config_yaml, method_id) + + # global_instances must be populated + assert "global_monitor" in config.global_instances + instance = config.global_instances["global_monitor"] + assert "Level" in instance.header_variables + assert "Time" in instance.header_variables + + # Convert back to dict + result_dict = config.to_dict(method_id) + result = result_dict["detectors"][method_id] + + # Serialised as "global" key + assert "global" in result + assert "global_monitor" in result["global"] + assert "header_variables" in result["global"]["global_monitor"] + hv_positions = [hv["pos"] for hv in result["global"]["global_monitor"]["header_variables"]] + assert "Level" in hv_positions + assert "Time" in hv_positions + + # True round-trip: yaml -> pydantic -> yaml -> pydantic + config2 = MockupDetectorConfig.from_dict(result_dict, method_id) + dict2 = config2.to_dict(method_id) + + assert config.global_instances.keys() == config2.global_instances.keys() + assert result_dict == dict2 + def test_parser_true_roundtrip(self): """Test parser yaml -> pydantic -> yaml -> pydantic roundtrip.""" config_yaml = load_test_config() diff --git a/tests/test_common/test_core.py b/tests/test_common/test_core.py index 018412b..a7b5220 100644 --- a/tests/test_common/test_core.py +++ b/tests/test_common/test_core.py @@ -1,4 +1,4 @@ -from detectmatelibrary.common.core import CoreConfig, CoreComponent, TrainState +from detectmatelibrary.common.core import CoreConfig, CoreComponent, TrainState, ConfigState from detectmatelibrary.common._config import BasicConfig from detectmatelibrary.utils.data_buffer import ArgsBuffer @@ -27,7 +27,8 @@ class MockConfigWithTraining(CoreConfig): "comp_type": "default_type", "auto_config": False, "start_id": 10, - "data_use_training": None + "data_use_training": None, + "data_use_configure": None } @@ -52,6 +53,54 @@ def run(self, input_, output_) -> None: return False +class MockConfigWithConfigure(CoreConfig): + thresholds: float = 0.7 + max_iter: int = 50 + data_use_configure: int | None = 3 + + +class MockComponentWithConfigure(CoreComponent): + def __init__( + self, name: str, config: MockConfigWithConfigure = MockConfigWithConfigure() + ) -> None: + super().__init__( + name=name, type_="Dummy", config=config, input_schema=schemas.LogSchema + ) + self.configure_data: list = [] + self.set_configuration_called: int = 0 + + def configure(self, input_) -> None: + self.configure_data.append(input_) + + def set_configuration(self) -> None: + self.set_configuration_called += 1 + + def run(self, input_, output_) -> bool: + return False + + +class MockComponentWithConfigureAndTraining(CoreComponent): + def __init__(self, name: str, config: CoreConfig = CoreConfig()) -> None: + super().__init__( + name=name, type_="Dummy", config=config, input_schema=schemas.LogSchema + ) + self.configure_data: list = [] + self.train_data: list = [] + self.set_configuration_called: int = 0 + + def configure(self, input_) -> None: + self.configure_data.append(input_) + + def set_configuration(self) -> None: + self.set_configuration_called += 1 + + def train(self, input_) -> None: + self.train_data.append(input_) + + def run(self, input_, output_) -> bool: + return False + + class DummyComponentWithBuffer(CoreComponent): def __init__(self, name: str, config: MockConfig = MockConfig()) -> None: super().__init__( @@ -216,3 +265,67 @@ def test_training_keep_training(self) -> None: ) assert len(component.train_data) == 10 + + def _make_log(self, i: int) -> schemas.LogSchema: + return schemas.LogSchema({ + "__version__": "1.0.0", + "logID": str(i), + "logSource": "test", + "hostname": "test_hostname" + }) + + def test_configuration(self) -> None: + component = MockComponentWithConfigure(name="DummyCfg1") + + results = [component.process(self._make_log(i)) for i in range(10)] + + assert component.data_used_configure == 3 + assert len(component.configure_data) == 3 + assert all(r is None for r in results[:3]) + assert component.set_configuration_called == 1 + + def test_configuration_returns_none_during_configure(self) -> None: + component = MockComponentWithConfigure(name="DummyCfg2") + + results = [component.process(self._make_log(i)) for i in range(3)] + + assert all(r is None for r in results) + + def test_configuration_force_stop(self) -> None: + component = MockComponentWithConfigure(name="DummyCfg3") + component.configure_state = ConfigState.STOP_CONFIGURE + + for i in range(10): + component.process(self._make_log(i)) + + assert len(component.configure_data) == 0 + assert component.set_configuration_called == 0 + + def test_configuration_keep_configure(self) -> None: + component = MockComponentWithConfigure(name="DummyCfg4") + component.configure_state = ConfigState.KEEP_CONFIGURE + + for i in range(10): + component.process(self._make_log(i)) + + assert len(component.configure_data) == 10 + assert component.set_configuration_called == 0 + + def test_configuration_before_training(self) -> None: + config = CoreConfig(data_use_configure=2, data_use_training=3) + component = MockComponentWithConfigureAndTraining(name="DummyCfg5", config=config) + + for i in range(10): + component.process(self._make_log(i)) + + assert len(component.configure_data) == 2 + assert len(component.train_data) == 3 + assert component.set_configuration_called == 1 + + def test_set_configuration_called_once(self) -> None: + component = MockComponentWithConfigure(name="DummyCfg6") + + for i in range(component.config.data_use_configure + 5): # type: ignore[operator] + component.process(self._make_log(i)) + + assert component.set_configuration_called == 1 diff --git a/tests/test_detectors/test_new_value_detector.py b/tests/test_detectors/test_new_value_detector.py index cf2ed70..f3bdef9 100644 --- a/tests/test_detectors/test_new_value_detector.py +++ b/tests/test_detectors/test_new_value_detector.py @@ -8,7 +8,11 @@ - Input/output schema validation """ -from detectmatelibrary.detectors.new_value_detector import NewValueDetector, BufferMode +from detectmatelibrary.detectors.new_value_detector import ( + NewValueDetector, NewValueDetectorConfig, BufferMode +) +from detectmatelibrary.common.core import ConfigState, TrainState +from detectmatelibrary.constants import GLOBAL_EVENT_ID from detectmatelibrary.parsers.template_matcher import MatcherParser from detectmatelibrary.helper.from_to import From import detectmatelibrary.schemas as schemas @@ -233,3 +237,77 @@ def test_audit_log_anomalies(self): detected_ids.add(log["logID"]) assert detected_ids == {'1859', '1860', '1861', '1862', '1864', '1865', '1866', '1867'} + + +class TestNewValueDetectorAutoConfig: + """Test that process() drives configure/set_configuration/train/detect + automatically.""" + + def test_audit_log_anomalies_via_process(self): + parser = MatcherParser(config=_PARSER_CONFIG) + detector = NewValueDetector() + + logs = list(From.log(parser, in_path="tests/test_folder/audit.log", do_process=True)) + + # Phase 1: configure — keep configuring for logs[:1800] + detector.configure_state = ConfigState.KEEP_CONFIGURE + for log in logs[:1800]: + detector.process(log) + + # Transition: stop configure so next process() call triggers set_configuration() + detector.configure_state = ConfigState.STOP_CONFIGURE + + # Phase 2: train — keep training for logs[:1800] + detector.train_state = TrainState.KEEP_TRAINING + for log in logs[:1800]: + detector.process(log) + + # Phase 3: detect — stop training so process() only calls detect() + detector.train_state = TrainState.STOP_TRAINING + detected_ids: set[str] = set() + for log in logs[1800:]: + if detector.process(log) is not None: + detected_ids.add(log["logID"]) + + assert detected_ids == {'1859', '1860', '1861', '1862', '1864', '1865', '1866', '1867'} + + +class TestNewValueDetectorGlobalInstances: + """Tests event-ID-independent global instance detection.""" + + def test_global_instance_detects_new_type(self): + """Global instance monitoring Type detects CRED_REFR, USER_AUTH, + USER_CMD which only appear after the training window (line 1800+).""" + parser = MatcherParser(config=_PARSER_CONFIG) + config_dict = { + "detectors": { + "NewValueDetector": { + "method_type": "new_value_detector", + "auto_config": False, + "global": { + "test": { + "header_variables": [{"pos": "Type"}] + } + } + } + } + } + config = NewValueDetectorConfig.from_dict(config_dict, "NewValueDetector") + detector = NewValueDetector(config=config) + + logs = list(From.log(parser, in_path="tests/test_folder/audit.log", do_process=True)) + + for log in logs[:1800]: + detector.train(log) + + # Global tracker must be populated under the sentinel event ID + assert GLOBAL_EVENT_ID in detector.persistency.get_events_data() + + detected_ids: set[str] = set() + for log in logs[1800:]: + output = schemas.DetectorSchema() + if detector.detect(log, output_=output): + assert all(key.startswith("Global -") for key in output["alertsObtain"]) + detected_ids.add(log["logID"]) + + assert len(detected_ids) > 0 diff --git a/tests/test_folder/test_config.yaml b/tests/test_folder/test_config.yaml index 8dd3b6c..ff679b1 100644 --- a/tests/test_folder/test_config.yaml +++ b/tests/test_folder/test_config.yaml @@ -133,6 +133,19 @@ detectors: parser: example_parser_1 auto_config: true + detector_global_instance: + method_type: ExampleDetector + parser: example_parser_1 + auto_config: false + params: {} + global: + global_monitor: + header_variables: + - pos: Level + params: + threshold: 0.2 + - pos: Time + NewValueDetector: method_type: new_value_detector parser: example_parser_1 diff --git a/tests/test_pipelines/test_configuration_engine.py b/tests/test_pipelines/test_configuration_engine.py new file mode 100644 index 0000000..493829a --- /dev/null +++ b/tests/test_pipelines/test_configuration_engine.py @@ -0,0 +1,89 @@ +from detectmatelibrary.detectors.new_value_detector import NewValueDetector, NewValueDetectorConfig +from detectmatelibrary.parsers.template_matcher import MatcherParser +from detectmatelibrary.helper.from_to import From + +import json + +AUDIT_LOG = "tests/test_folder/audit.log" +AUDIT_TEMPLATES = "tests/test_folder/audit_templates.txt" +ANOMALY_LABELS = "tests/test_folder/audit_anomaly_labels.log" +LOG_FORMAT = "type= msg=audit(