Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/detectmatelibrary/common/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def to_dict(self, method_id: str) -> Dict[str, Any]:
# Collect all non-meta fields for params
params = {}
events_data = None
instances_data = None

for field_name, field_value in self:
# Skip meta fields
Expand All @@ -72,6 +73,13 @@ def to_dict(self, method_id: str) -> Dict[str, Any]:
events_data = field_value.to_dict()
else:
events_data = field_value
# Handle global instances specially (top-level, not in params)
# Serialized as "global" in YAML (Python field is "global_instances")
elif field_name == "global_instances" and field_value:
instances_data = {
name: inst.to_dict()
for name, inst in field_value.items()
}
else:
# All other fields go into params
params[field_name] = field_value
Expand All @@ -80,6 +88,10 @@ def to_dict(self, method_id: str) -> Dict[str, Any]:
if params:
result["params"] = params

# Add global instances if they exist (serialized as "global" in YAML)
if instances_data is not None:
result["global"] = instances_data

# Add events if they exist
if events_data is not None:
result["events"] = events_data
Expand Down
17 changes: 13 additions & 4 deletions src/detectmatelibrary/common/_config/_compile.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from detectmatelibrary.common._config._formats import EventsConfig
from detectmatelibrary.common._config._formats import EventsConfig, _EventInstance

from typing import Any, Dict, List, Sequence, Tuple, Union
import warnings
Expand Down Expand Up @@ -93,8 +93,9 @@ def check_type(config: Dict[str, Any], method_type: str) -> None:
def process(config: Dict[str, Any]) -> Dict[str, Any]:
has_params = "params" in config
has_events = "events" in config
has_instances = "global" in config

if not has_params and not has_events and not config.get("auto_config", False):
if not has_params and not has_events and not has_instances and not config.get("auto_config", False):
warnings.warn(MissingParamsWarning())

if has_params:
Expand All @@ -108,11 +109,19 @@ def process(config: Dict[str, Any]) -> Dict[str, Any]:
if has_events:
config["events"] = EventsConfig._init(config["events"])

# Handle "global" key: event-ID-independent global instances
# Renamed to "global_instances" to avoid collision with Python keyword
if has_instances:
config["global_instances"] = {
name: _EventInstance._init(**data)
for name, data in config.pop("global").items()
}

return config


def generate_detector_config(
variable_selection: Dict[int, List[Union[str, Tuple[str, ...]]]],
variable_selection: Dict[int | str, List[Union[str, Tuple[str, ...]]]],
detector_name: str,
method_type: str,
**additional_params: Any
Expand Down Expand Up @@ -158,7 +167,7 @@ def generate_detector_config(
"""
var_pattern = re.compile(r"^var_(\d+)$")

events_config: Dict[int, Dict[str, Any]] = {}
events_config: Dict[int | str, Dict[str, Any]] = {}

for event_id, variable_names in variable_selection.items():
instances: Dict[str, Any] = {}
Expand Down
59 changes: 55 additions & 4 deletions src/detectmatelibrary/common/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,25 @@ def describe(self) -> str:
return descriptions[self.value]


class ConfigState(Enum):
DEFAULT = 0
STOP_CONFIGURE = 1
KEEP_CONFIGURE = 2

def describe(self) -> str:
descriptions = [
"Follow default configuration behavior.",
"Force stop configuration.",
"Keep configuring regardless of default behavior."
]

return descriptions[self.value]


class CoreConfig(BasicConfig):
start_id: int = 10
data_use_training: int | None = None
data_use_configure: int | None = None


def do_training(config: CoreConfig, index: int, train_state: TrainState) -> bool:
Expand All @@ -67,6 +83,15 @@ def do_training(config: CoreConfig, index: int, train_state: TrainState) -> bool
return config.data_use_training is not None and config.data_use_training > index


def do_configure(config: CoreConfig, index: int, configure_state: ConfigState) -> bool:
if configure_state == ConfigState.STOP_CONFIGURE:
return False
elif configure_state == ConfigState.KEEP_CONFIGURE:
return True

return config.data_use_configure is not None and config.data_use_configure > index


class CoreComponent:
"""Base class for all components in the system."""
def __init__(
Expand All @@ -86,6 +111,9 @@ def __init__(
self.id_generator = SimpleIDGenerator(self.config.start_id)
self.data_used_train = 0
self.train_state: TrainState = TrainState.DEFAULT
self.data_used_configure = 0
self.configure_state: ConfigState = ConfigState.DEFAULT
self._configuration_done = False

def __repr__(self) -> str:
return f"<{self.type_}> {self.name}: {self.config}"
Expand All @@ -100,17 +128,40 @@ def train(
) -> None:
pass

def configure(
self, input_: List[BaseSchema] | BaseSchema,
) -> None:
pass

def set_configuration(self) -> None:
pass

def process(self, data: BaseSchema | bytes) -> BaseSchema | bytes | None:
is_byte, data = SchemaPipeline.preprocess(self.input_schema(), data)
logger.debug(f"<<{self.name}>> received:\n{data}")

if (data_buffered := self.data_buffer.add(data)) is None: # type: ignore
return None

if do_training(config=self.config, index=self.data_used_train, train_state=self.train_state):
self.data_used_train += 1
logger.info(f"<<{self.name}>> use data for training")
self.train(input_=data_buffered)
if do_configure(
config=self.config,
index=self.data_used_configure,
configure_state=self.configure_state
):
self.data_used_configure += 1
logger.info(f"<<{self.name}>> use data for configuration")
self.configure(input_=data_buffered)
return None
else:
if self.data_used_configure > 0 and not self._configuration_done:
self._configuration_done = True
logger.info(f"<<{self.name}>> finalizing configuration")
self.set_configuration()

if do_training(config=self.config, index=self.data_used_train, train_state=self.train_state):
self.data_used_train += 1
logger.info(f"<<{self.name}>> use data for training")
self.train(input_=data_buffered)

output_ = self.output_schema()
logger.info(f"<<{self.name}>> processing data")
Expand Down
33 changes: 32 additions & 1 deletion src/detectmatelibrary/common/detector.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from detectmatelibrary.common._config._formats import EventsConfig
from detectmatelibrary.common._config._formats import EventsConfig, _EventInstance
from detectmatelibrary.common.core import CoreComponent, CoreConfig

from detectmatelibrary.utils.data_buffer import ArgsBuffer, BufferMode
Expand Down Expand Up @@ -68,6 +68,27 @@ def get_configured_variables(
return result


def get_global_variables(
input_: ParserSchema,
global_instances: Dict[str, _EventInstance],
) -> Dict[str, Any]:
"""Extract header variables from event-ID-independent instances.

Args:
input_: Parser schema containing logFormatVariables
global_instances: Dict of instance_name -> _EventInstance configs

Returns:
Dict mapping variable names to their values from the input
"""
result: Dict[str, Any] = {}
for instance in global_instances.values():
for name in instance.header_variables:
if name in input_["logFormatVariables"]:
result[name] = input_["logFormatVariables"][name]
return result


class CoreDetectorConfig(CoreConfig):
comp_type: str = "detectors"
method_type: str = "core_detector"
Expand Down Expand Up @@ -125,3 +146,13 @@ def train(
self, input_: ParserSchema | list[ParserSchema] # type: ignore
) -> None:
pass

@override
def configure(
self, input_: ParserSchema | list[ParserSchema] # type: ignore
) -> None:
pass

@override
def set_configuration(self) -> None:
pass
1 change: 1 addition & 0 deletions src/detectmatelibrary/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
TIMESTAMP = "Time"
EVENT_TEMPLATE = "EventTemplate"
EVENT_ID = "EventId"
GLOBAL_EVENT_ID = "*"
33 changes: 30 additions & 3 deletions src/detectmatelibrary/detectors/new_value_combo_detector.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
from detectmatelibrary.common._config import generate_detector_config
from detectmatelibrary.common._config._formats import EventsConfig
from detectmatelibrary.common._config._formats import EventsConfig, _EventInstance

from detectmatelibrary.common.detector import CoreDetectorConfig, CoreDetector, get_configured_variables
from detectmatelibrary.common.detector import (
CoreDetectorConfig,
CoreDetector,
get_configured_variables,
get_global_variables
)

from detectmatelibrary.utils.data_buffer import BufferMode
from detectmatelibrary.utils.persistency.event_data_structures.trackers import (
Expand All @@ -10,6 +15,7 @@
from detectmatelibrary.utils.persistency.event_persistency import EventPersistency

from detectmatelibrary.schemas import ParserSchema, DetectorSchema
from detectmatelibrary.constants import GLOBAL_EVENT_ID

from typing import Any, Dict, Sequence, cast, Tuple
from itertools import combinations
Expand Down Expand Up @@ -48,6 +54,7 @@ class NewValueComboDetectorConfig(CoreDetectorConfig):
method_type: str = "new_value_combo_detector"

events: EventsConfig | dict[str, Any] = {}
global_instances: Dict[str, _EventInstance] = {}
comb_size: int = 2


Expand Down Expand Up @@ -85,6 +92,14 @@ def train(self, input_: ParserSchema) -> None: # type: ignore
event_template=input_["template"],
named_variables=configured_variables
)
if config.global_instances:
global_vars = get_global_variables(input_, config.global_instances)
if global_vars:
self.persistency.ingest_event(
event_id=GLOBAL_EVENT_ID,
event_template=input_["template"],
named_variables=global_vars
)

def detect(
self, input_: ParserSchema, output_: DetectorSchema # type: ignore
Expand All @@ -110,6 +125,18 @@ def detect(
)
overall_score += 1.0

if config.global_instances and GLOBAL_EVENT_ID in known_events:
global_vars = get_global_variables(input_, config.global_instances)
global_combo_dict = get_combo(global_vars)
global_tracker = known_events[GLOBAL_EVENT_ID]
for combo_key, multi_tracker in global_tracker.get_data().items():
value_tuple = global_combo_dict.get(combo_key)
if value_tuple is None:
continue
if value_tuple not in multi_tracker.unique_set:
alerts[f"Global - {combo_key}"] = f"Unknown value combination: {value_tuple}"
overall_score += 1.0

if overall_score > 0:
output_["score"] = overall_score
output_["description"] = (
Expand All @@ -120,7 +147,7 @@ def detect(
return True
return False

def configure(self, input_: ParserSchema) -> None:
def configure(self, input_: ParserSchema) -> None: # type: ignore
"""Configure the detector based on the stability of individual
variables, then learn value combinations based on that
configuration."""
Expand Down
35 changes: 30 additions & 5 deletions src/detectmatelibrary/detectors/new_value_detector.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,29 @@
from detectmatelibrary.common._config._compile import generate_detector_config
from detectmatelibrary.common._config._formats import EventsConfig

from detectmatelibrary.common.detector import CoreDetectorConfig, CoreDetector, get_configured_variables
from detectmatelibrary.common._config._formats import EventsConfig, _EventInstance

from detectmatelibrary.common.detector import (
CoreDetectorConfig,
CoreDetector,
get_configured_variables,
get_global_variables
)
from detectmatelibrary.utils.persistency.event_data_structures.trackers.stability.stability_tracker import (
EventStabilityTracker
)
from detectmatelibrary.utils.persistency.event_persistency import EventPersistency
from detectmatelibrary.utils.data_buffer import BufferMode

from detectmatelibrary.schemas import ParserSchema, DetectorSchema
from detectmatelibrary.constants import GLOBAL_EVENT_ID

from typing import Any
from typing import Any, Dict


class NewValueDetectorConfig(CoreDetectorConfig):
method_type: str = "new_value_detector"

events: EventsConfig | dict[str, Any] = {}
global_instances: Dict[str, _EventInstance] = {}


class NewValueDetector(CoreDetector):
Expand Down Expand Up @@ -50,6 +56,14 @@ def train(self, input_: ParserSchema) -> None: # type: ignore
event_template=input_["template"],
named_variables=configured_variables
)
if self.config.global_instances:
global_vars = get_global_variables(input_, self.config.global_instances)
if global_vars:
self.persistency.ingest_event(
event_id=GLOBAL_EVENT_ID,
event_template=input_["template"],
named_variables=global_vars
)

def detect(
self, input_: ParserSchema, output_: DetectorSchema # type: ignore
Expand All @@ -74,6 +88,17 @@ def detect(
)
overall_score += 1.0

if self.config.global_instances and GLOBAL_EVENT_ID in known_events:
global_vars = get_global_variables(input_, self.config.global_instances)
global_tracker = known_events[GLOBAL_EVENT_ID]
for var_name, multi_tracker in global_tracker.get_data().items():
value = global_vars.get(var_name)
if value is None:
continue
if value not in multi_tracker.unique_set:
alerts[f"Global - {var_name}"] = f"Unknown value: '{value}'"
overall_score += 1.0

if overall_score > 0:
output_["score"] = overall_score
output_["description"] = f"{self.name} detects values not encountered in training as anomalies."
Expand All @@ -82,7 +107,7 @@ def detect(

return False

def configure(self, input_: ParserSchema) -> None:
def configure(self, input_: ParserSchema) -> None: # type: ignore
self.auto_conf_persistency.ingest_event(
event_id=input_["EventID"],
event_template=input_["template"],
Expand Down
Loading