diff --git a/src/instana/instrumentation/urllib3.py b/src/instana/instrumentation/urllib3.py index b102714f..fa1f620a 100644 --- a/src/instana/instrumentation/urllib3.py +++ b/src/instana/instrumentation/urllib3.py @@ -94,23 +94,7 @@ def urlopen_with_instana( tracer, parent_span, span_name = get_tracer_tuple() # If we're not tracing, just return; boto3 has it's own visibility - # Also, skip creating spans for internal Instana calls when - # 'com.instana' appears in either the full URL, the path argument, - # or the connection host. - request_url_or_path = ( - kwargs.get("request_url") - or kwargs.get("url") - or (args[1] if len(args) >= 2 else "") - or "" - ) - host = getattr(instance, "host", "") or "" - - if ( - not tracer - or span_name == "boto3" - or "com.instana" in request_url_or_path - or "com.instana" in host - ): + if not tracer or span_name == "boto3": return wrapped(*args, **kwargs) parent_context = parent_span.get_span_context() if parent_span else None diff --git a/src/instana/options.py b/src/instana/options.py index 12afc710..cb709918 100644 --- a/src/instana/options.py +++ b/src/instana/options.py @@ -27,10 +27,10 @@ get_disable_trace_configurations_from_yaml, get_stack_trace_config_from_yaml, is_truthy, - parse_filtered_endpoints, - parse_filtered_endpoints_from_yaml, + parse_filter_rules, + parse_filter_rules_yaml, parse_span_disabling, - parse_span_filter_env_vars, + parse_filter_rules_env_vars, parse_technology_stack_trace_config, validate_stack_trace_length, validate_stack_trace_level, @@ -106,23 +106,6 @@ def set_trace_configurations(self) -> None: ): self.allow_exit_as_root = True - # The priority is as follows: - # environment variables > in-code configuration > - # > agent config (configuration.yaml) > default value - if any(k.startswith("INSTANA_TRACING_FILTER_") for k in os.environ): - # Check for new span filtering env vars - parsed_filter = parse_span_filter_env_vars() - if parsed_filter["exclude"] or parsed_filter["include"]: - self.span_filters = parsed_filter - elif "INSTANA_CONFIG_PATH" in os.environ: - self.span_filters = parse_filtered_endpoints_from_yaml( - os.environ["INSTANA_CONFIG_PATH"] - ) - elif isinstance(config.get("tracing"), dict) and "filter" in config["tracing"]: - self.span_filters = parse_filtered_endpoints( - config["tracing"]["filter"], - ) - if "INSTANA_KAFKA_TRACE_CORRELATION" in os.environ: self.kafka_trace_correlation = is_truthy( os.environ["INSTANA_KAFKA_TRACE_CORRELATION"] @@ -134,6 +117,36 @@ def set_trace_configurations(self) -> None: self.set_disable_trace_configurations() self.set_stack_trace_configurations() + self.set_span_filter_configurations() + + def _add_instana_agent_span_filter(self) -> None: + """Add Instana agent span filter to exclude internal spans.""" + if "exclude" not in self.span_filters: + self.span_filters["exclude"] = [] + self.span_filters["exclude"].extend( + [ + { + "name": "filter-internal-spans-by-url", + "attributes": [ + { + "key": "http.url", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, + { + "name": "filter-internal-spans-by-host", + "attributes": [ + { + "key": "http.host", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, + ] + ) def _apply_env_stack_trace_config(self) -> None: """Apply stack trace configuration from environment variables.""" @@ -235,6 +248,26 @@ def set_disable_trace_configurations(self) -> None: self.disabled_spans.extend(disabled_spans) self.enabled_spans.extend(enabled_spans) + def set_span_filter_configurations(self) -> None: + # The precedence is as follows: + # environment variables > in-code configuration > + # > agent config (configuration.yaml) > default value + if any(k.startswith("INSTANA_TRACING_FILTER_") for k in os.environ): + # Check for new span filtering env vars + parsed_filter = parse_filter_rules_env_vars() + if parsed_filter["exclude"] or parsed_filter["include"]: + self.span_filters = parsed_filter + elif "INSTANA_CONFIG_PATH" in os.environ: + self.span_filters = parse_filter_rules_yaml( + os.environ["INSTANA_CONFIG_PATH"] + ) + elif isinstance(config.get("tracing"), dict) and "filter" in config["tracing"]: + self.span_filters = parse_filter_rules( + config["tracing"]["filter"], + ) + + self._add_instana_agent_span_filter() + def is_span_disabled(self, category=None, span_type=None) -> bool: """ Check if a span is disabled based on its category and type. @@ -342,7 +375,7 @@ def set_tracing(self, tracing: Dict[str, Any]) -> None: @return: None """ if "filter" in tracing and not self.span_filters: - self.span_filters = parse_filtered_endpoints(tracing["filter"]) + self.span_filters = parse_filter_rules(tracing["filter"]) if "kafka" in tracing: if ( diff --git a/src/instana/util/config.py b/src/instana/util/config.py index 9ec951b0..2b1abeb1 100644 --- a/src/instana/util/config.py +++ b/src/instana/util/config.py @@ -42,46 +42,54 @@ } -def parse_service_pair(pair: str) -> List[str]: +def parse_filter_rules_string( + params: str, + intermediate: Dict[str, Any], + policy: str, + name: str, +) -> Dict[str, List[str]]: + """ + Parses a string to prepare filtered endpoint rules. + + @param params: String format with rules separated by '|': + - "key;values;match_type|key;values;match_type" + - Example: "http.target;/health;strict|kafka.service;topic1,topic2;strict" + - match_type is optional and defaults to "strict" + @param intermediate: Dictionary to store parsed rules + @param policy: Policy type ("exclude" or "include") + @param name: Name of the filter rule + @return: Updated intermediate dictionary with parsed attribute rules """ - Parses a pair string to prepare a list of ignored endpoints. - - @param pair: String format: - - "service1:method1,method2" or "service1:method1" or "service1" - @return: List of strings in format ["service1.method1", "service1.method2", "service2.*"] - """ - pair_list = [] - if ":" in pair: - service, methods = pair.split(":", 1) - service = service.strip() - method_list = [ep.strip() for ep in methods.split(",") if ep.strip()] - - for method in method_list: - pair_list.append(f"{service}.{method}") - else: - pair_list.append(f"{pair}.*") - return pair_list + try: + # Rule format: key;values;match_type|key;values;match_type + rules = params.split("|") + for rule in rules: + rule_parts = rule.split(";") + if len(rule_parts) < 2: + continue + key = rule_parts[0].strip() + values_str = rule_parts[1] + match_type = ( + rule_parts[2].strip().lower() if len(rule_parts) > 2 else "strict" + ) -def parse_filtered_endpoints_string(params: Union[str, os.PathLike]) -> List[str]: - """ - Parses a string to prepare a list of ignored endpoints. + # Split values by comma (simple split, assuming no commas in values or user handles escaping if needed?) + # Spec says "values": Mandatory - List of Strings. + # Env var examples: "http.target;/health" -> values=["/health"] + # "kafka.service;topic1,topic2;strict" -> values=["topic1", "topic2"] + values = [v.strip() for v in values_str.split(",") if v.strip()] - @param params: String format: - - "service1:method1,method2;service2:method3" or "service1;service2" - @return: List of strings in format ["service1.method1", "service1.method2", "service2.*"] - """ - span_filters = [] - if params: - service_pairs = params.lower().split(";") + attr_data = {"key": key, "values": values, "match_type": match_type} + intermediate[policy][name]["attributes"].append(attr_data) - for pair in service_pairs: - if pair.strip(): - span_filters += parse_service_pair(pair) - return span_filters + return intermediate + except Exception as e: + logger.error(f"Failed to parse filter params: {e}") + return {} -def parse_filtered_endpoints_dict(filter_dict: dict[str, Any]) -> dict[str, list[Any]]: +def parse_filter_rules_dict(filter_dict: Dict[str, Any]) -> Dict[str, List[Any]]: """ Parses 'exclude' and 'include' blocks from the filter dict. @@ -132,37 +140,36 @@ def parse_filtered_endpoints_dict(filter_dict: dict[str, Any]) -> dict[str, list return {"exclude": [], "include": []} -def parse_filtered_endpoints( - params: Union[Dict[str, Any], str], -) -> Union[List[str], dict[str, list[Any]]]: +def parse_filter_rules( + params: Dict[str, Any], +) -> Dict[str, List[Any]]: """ - Parses input to prepare a list for ignored endpoints. + Parses input to prepare filtered endpoints. - @param params: Can be either: - - String: "service1:method1,method2;service2:method3" or "service1;service2" - - Dict: {"exclude": [{"name": "foo", "attributes": ...}], "include": []} - @return: List of strings in format ["service1.method1", "service1.method2", "service2.*"] + @param params: Dict with structure: + {"exclude": [{"name": "foo", "attributes": ...}], "include": [{"name": "foo", "attributes": ...}]} + @return: Dict with structure {"exclude": [...], "include": [...]} """ try: - if isinstance(params, str): - return parse_filtered_endpoints_string(params) - elif isinstance(params, dict): - return parse_filtered_endpoints_dict(params) - else: - return [] + return parse_filter_rules_dict(params) except Exception as e: - logger.debug("Error parsing ignored endpoints: %s", str(e)) - return [] + logger.debug("Error parsing filtered endpoints: %s", str(e)) + return {} -def parse_filtered_endpoints_from_yaml( +def parse_filter_rules_yaml( file_path: str, -) -> Union[List[str], dict[str, list[Any]]]: +) -> Dict[str, List[Any]]: """ - Parses configuration yaml file and prepares a list of ignored endpoints. + Parses configuration YAML file and prepares filtered endpoint rules. - @param file_path: Path of the file as a string - @return: List of strings in format ["service1.method1", "service1.method2", "service2.*", "kafka.method.topic", "kafka.*.topic", "kafka.method.*"] + @param file_path: Path to the YAML configuration file + @return: Dictionary containing parsed filter rules with structure: + { + "exclude": [{"name": str, "suppression": bool, "attributes": [{"key": str, "values": list, "match_type": str}]}], + "include": [{"name": str, "suppression": None, "attributes": [{"key": str, "values": list, "match_type": str}]}] + } + Returns empty dict {} if no filter configuration is found or on error. """ config_reader = ConfigReader(file_path) span_filters_dict = None @@ -172,13 +179,13 @@ def parse_filtered_endpoints_from_yaml( logger.warning(DEPRECATED_CONFIG_KEY_WARNING) span_filters_dict = config_reader.data["com.instana.tracing"].get("filter") if span_filters_dict: - span_filters = parse_filtered_endpoints(span_filters_dict) + span_filters = parse_filter_rules(span_filters_dict) return span_filters else: - return [] + return {} -def parse_span_filter_env_vars() -> Dict[str, List[Any]]: +def parse_filter_rules_env_vars() -> Dict[str, List[Any]]: """ Parses INSTANA_TRACING_FILTER___ATTRIBUTES environment variables. @@ -216,27 +223,12 @@ def parse_span_filter_env_vars() -> Dict[str, List[Any]]: } if suffix == "ATTRIBUTES": - # Rule format: key;values;match_type|key;values;match_type - rules = env_value.split("|") - for rule in rules: - rule_parts = rule.split(";") - if len(rule_parts) < 2: - continue - - key = rule_parts[0].strip() - values_str = rule_parts[1] - match_type = ( - rule_parts[2].strip().lower() if len(rule_parts) > 2 else "strict" - ) - - # Split values by comma (simple split, assuming no commas in values or user handles escaping if needed?) - # Spec says "values": Mandatory - List of Strings. - # Env var examples: "http.target;/health" -> values=["/health"] - # "kafka.service;topic1,topic2;strict" -> values=["topic1", "topic2"] - values = [v.strip() for v in values_str.split(",") if v.strip()] - - attr_data = {"key": key, "values": values, "match_type": match_type} - intermediate[policy][name]["attributes"].append(attr_data) + intermediate = parse_filter_rules_string( + env_value, + intermediate, + policy, + name, + ) elif suffix == "SUPPRESSION" and policy == "exclude": intermediate[policy][name]["suppression"] = is_truthy(env_value) diff --git a/tests/clients/kafka/test_confluent_kafka.py b/tests/clients/kafka/test_confluent_kafka.py index 7899198a..05817f4d 100644 --- a/tests/clients/kafka/test_confluent_kafka.py +++ b/tests/clients/kafka/test_confluent_kafka.py @@ -25,7 +25,7 @@ from instana.options import StandardOptions from instana.singletons import agent, get_tracer from instana.span.span import InstanaSpan -from instana.util.config import parse_filtered_endpoints_from_yaml +from instana.util.config import parse_filter_rules_yaml from tests.helpers import get_first_span_by_filter, testenv @@ -408,8 +408,9 @@ def test_filter_confluent_specific_topic(self) -> None: span_to_be_filtered = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["service"] == "span-topic", + lambda span: ( + span.n == "kafka" and span.data["kafka"]["service"] == "span-topic" + ), ) assert span_to_be_filtered not in filtered_spans @@ -420,7 +421,7 @@ def test_filter_confluent_specific_topic(self) -> None: ) def test_filter_confluent_specific_topic_with_config_file(self) -> None: - agent.options.span_filters = parse_filtered_endpoints_from_yaml( + agent.options.span_filters = parse_filter_rules_yaml( "tests/util/test_configuration-1.yaml" ) @@ -474,27 +475,35 @@ def test_confluent_kafka_consumer_root_exit(self) -> None: producer_span_1 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "produce" - and span.data["kafka"]["service"] == "span-topic_1", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "produce" + and span.data["kafka"]["service"] == "span-topic_1" + ), ) producer_span_2 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "produce" - and span.data["kafka"]["service"] == "span-topic_2", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "produce" + and span.data["kafka"]["service"] == "span-topic_2" + ), ) consumer_span_1 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "consume" - and span.data["kafka"]["service"] == "span-topic_1", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "consume" + and span.data["kafka"]["service"] == "span-topic_1" + ), ) consumer_span_2 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "consume" - and span.data["kafka"]["service"] == "span-topic_2", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "consume" + and span.data["kafka"]["service"] == "span-topic_2" + ), ) # same trace id, different span ids @@ -538,16 +547,20 @@ def test_confluent_kafka_poll_root_exit_with_trace_correlation(self) -> None: producer_span = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "produce" - and span.data["kafka"]["service"] == "span-topic-poll", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "produce" + and span.data["kafka"]["service"] == "span-topic-poll" + ), ) poll_span = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "poll" - and span.data["kafka"]["service"] == "span-topic-poll", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "poll" + and span.data["kafka"]["service"] == "span-topic-poll" + ), ) # Same traceId @@ -580,16 +593,20 @@ def test_confluent_kafka_poll_root_exit_without_trace_correlation(self) -> None: producer_span = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "produce" - and span.data["kafka"]["service"] == f"{testenv['kafka_topic']}-wo-tc", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "produce" + and span.data["kafka"]["service"] == f"{testenv['kafka_topic']}-wo-tc" + ), ) poll_span = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "poll" - and span.data["kafka"]["service"] == f"{testenv['kafka_topic']}-wo-tc", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "poll" + and span.data["kafka"]["service"] == f"{testenv['kafka_topic']}-wo-tc" + ), ) # Different traceId @@ -697,27 +714,35 @@ def test_confluent_kafka_downstream_suppression(self) -> None: producer_span_1 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "produce" - and span.data["kafka"]["service"] == "span-topic_1", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "produce" + and span.data["kafka"]["service"] == "span-topic_1" + ), ) producer_span_2 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "produce" - and span.data["kafka"]["service"] == "span-topic_2", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "produce" + and span.data["kafka"]["service"] == "span-topic_2" + ), ) consumer_span_1 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "consume" - and span.data["kafka"]["service"] == "span-topic_1", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "consume" + and span.data["kafka"]["service"] == "span-topic_1" + ), ) consumer_span_2 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "consume" - and span.data["kafka"]["service"] == "span-topic_2", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "consume" + and span.data["kafka"]["service"] == "span-topic_2" + ), ) assert producer_span_1 @@ -926,8 +951,9 @@ def test_confluent_kafka_poll_none_then_message(self) -> None: kafka_span = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "produce", + lambda span: ( + span.n == "kafka" and span.data["kafka"]["access"] == "produce" + ), ) assert kafka_span is not None assert kafka_span.data["kafka"]["service"] == testenv["kafka_topic"] + "_3" diff --git a/tests/clients/kafka/test_kafka_python.py b/tests/clients/kafka/test_kafka_python.py index d29ac889..eb36a03a 100644 --- a/tests/clients/kafka/test_kafka_python.py +++ b/tests/clients/kafka/test_kafka_python.py @@ -23,7 +23,7 @@ from instana.options import StandardOptions from instana.singletons import agent, get_tracer from instana.span.span import InstanaSpan -from instana.util.config import parse_filtered_endpoints_from_yaml +from instana.util.config import parse_filter_rules_yaml from tests.helpers import get_first_span_by_filter, testenv @@ -452,13 +452,14 @@ def test_filter_specific_topic(self) -> None: span_to_be_filtered = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["service"] == "span-topic", + lambda span: ( + span.n == "kafka" and span.data["kafka"]["service"] == "span-topic" + ), ) assert span_to_be_filtered not in filtered_spans def test_filter_specific_topic_with_config_file(self) -> None: - agent.options.span_filters = parse_filtered_endpoints_from_yaml( + agent.options.span_filters = parse_filter_rules_yaml( "tests/util/test_configuration-1.yaml" ) @@ -541,40 +542,52 @@ def test_kafka_poll_root_exit_with_trace_correlation(self) -> None: producer_span_1 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "send" - and span.data["kafka"]["service"] == "span-topic_1", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "send" + and span.data["kafka"]["service"] == "span-topic_1" + ), ) producer_span_2 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "send" - and span.data["kafka"]["service"] == "span-topic_2", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "send" + and span.data["kafka"]["service"] == "span-topic_2" + ), ) producer_span_3 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "send" - and span.data["kafka"]["service"] == "span-topic_3", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "send" + and span.data["kafka"]["service"] == "span-topic_3" + ), ) poll_span_1 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "poll" - and span.data["kafka"]["service"] == "span-topic_1", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "poll" + and span.data["kafka"]["service"] == "span-topic_1" + ), ) poll_span_2 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "poll" - and span.data["kafka"]["service"] == "span-topic_2", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "poll" + and span.data["kafka"]["service"] == "span-topic_2" + ), ) poll_span_3 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "poll" - and span.data["kafka"]["service"] == "span-topic_3", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "poll" + and span.data["kafka"]["service"] == "span-topic_3" + ), ) assert producer_span_1.n == "kafka" @@ -642,40 +655,52 @@ def test_kafka_poll_root_exit_without_trace_correlation(self) -> None: producer_span_1 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "send" - and span.data["kafka"]["service"] == "span-topic_1", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "send" + and span.data["kafka"]["service"] == "span-topic_1" + ), ) producer_span_2 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "send" - and span.data["kafka"]["service"] == "span-topic_2", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "send" + and span.data["kafka"]["service"] == "span-topic_2" + ), ) producer_span_3 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "send" - and span.data["kafka"]["service"] == "span-topic_3", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "send" + and span.data["kafka"]["service"] == "span-topic_3" + ), ) poll_span_1 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "poll" - and span.data["kafka"]["service"] == "span-topic_1", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "poll" + and span.data["kafka"]["service"] == "span-topic_1" + ), ) poll_span_2 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "poll" - and span.data["kafka"]["service"] == "span-topic_2", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "poll" + and span.data["kafka"]["service"] == "span-topic_2" + ), ) poll_span_3 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "poll" - and span.data["kafka"]["service"] == "span-topic_3", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "poll" + and span.data["kafka"]["service"] == "span-topic_3" + ), ) assert producer_span_1.n == "kafka" @@ -781,34 +806,44 @@ def test_kafka_downstream_suppression(self) -> None: producer_span_1 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "send" - and span.data["kafka"]["service"] == "span-topic_1", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "send" + and span.data["kafka"]["service"] == "span-topic_1" + ), ) producer_span_2 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "send" - and span.data["kafka"]["service"] == "span-topic_2", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "send" + and span.data["kafka"]["service"] == "span-topic_2" + ), ) producer_span_3 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "send" - and span.data["kafka"]["service"] == "span-topic_3", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "send" + and span.data["kafka"]["service"] == "span-topic_3" + ), ) poll_span_2 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "poll" - and span.data["kafka"]["service"] == "span-topic_2", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "poll" + and span.data["kafka"]["service"] == "span-topic_2" + ), ) poll_span_3 = get_first_span_by_filter( spans, - lambda span: span.n == "kafka" - and span.data["kafka"]["access"] == "poll" - and span.data["kafka"]["service"] == "span-topic_3", + lambda span: ( + span.n == "kafka" + and span.data["kafka"]["access"] == "poll" + and span.data["kafka"]["service"] == "span-topic_3" + ), ) assert producer_span_1.n == "kafka" @@ -903,7 +938,7 @@ def test_clear_context(self, span: "InstanaSpan") -> None: assert kafka_python.consumer_token is None def test_kafka_producer_include_filter(self) -> None: - agent.options.span_filters = parse_filtered_endpoints_from_yaml( + agent.options.span_filters = parse_filter_rules_yaml( "tests/util/test_configuration-1.yaml" ) with self.tracer.start_as_current_span("test-span"): diff --git a/tests/clients/test_urllib3.py b/tests/clients/test_urllib3.py index 0a595721..3cdab441 100644 --- a/tests/clients/test_urllib3.py +++ b/tests/clients/test_urllib3.py @@ -1006,12 +1006,15 @@ def test_internal_span_creation_with_url_in_hostname(self) -> None: spans = self.recorder.queued_spans() - assert len(spans) == 1 + assert len(spans) == 2 + + filtered_spans = agent.filter_spans(spans) + assert len(filtered_spans) == 1 - test_span = spans[0] + test_span = filtered_spans[0] assert test_span.data["sdk"]["name"] == "test" - urllib3_spans = [span for span in spans if span.n == "urllib3"] + urllib3_spans = [span for span in filtered_spans if span.n == "urllib3"] assert len(urllib3_spans) == 0 def test_internal_span_creation_with_url_in_path(self) -> None: @@ -1024,11 +1027,13 @@ def test_internal_span_creation_with_url_in_path(self) -> None: pass spans = self.recorder.queued_spans() + assert len(spans) == 2 - assert len(spans) == 1 + filtered_spans = agent.filter_spans(spans) + assert len(filtered_spans) == 1 - test_span = spans[0] + test_span = filtered_spans[0] assert test_span.data["sdk"]["name"] == "test" - urllib3_spans = [span for span in spans if span.n == "urllib3"] + urllib3_spans = [span for span in filtered_spans if span.n == "urllib3"] assert len(urllib3_spans) == 0 diff --git a/tests/test_options.py b/tests/test_options.py index 5caf47d7..e0a4d35f 100644 --- a/tests/test_options.py +++ b/tests/test_options.py @@ -18,6 +18,29 @@ StandardOptions, ) +INTERNAL_SPAN_FILTERS = [ + { + "name": "filter-internal-spans-by-url", + "attributes": [ + { + "key": "http.url", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, + { + "name": "filter-internal-spans-by-host", + "attributes": [ + { + "key": "http.host", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, +] + class TestBaseOptions: @pytest.fixture(autouse=True) @@ -39,7 +62,7 @@ def test_base_options(self) -> None: assert self.base_options.log_level == logging.WARN assert not self.base_options.extra_http_headers assert not self.base_options.allow_exit_as_root - assert not self.base_options.span_filters + assert self.base_options.span_filters == {"exclude": INTERNAL_SPAN_FILTERS} assert self.base_options.kafka_trace_correlation assert self.base_options.secrets_matcher == "contains-ignore-case" assert self.base_options.secrets_list == ["key", "pass", "secret"] @@ -49,15 +72,61 @@ def test_base_options(self) -> None: def test_base_options_with_config(self) -> None: config["tracing"] = { - "filter": "service1;service3:method1,method2", + "filter": { + "exclude": [ + { + "name": "service1", + "attributes": [ + { + "key": "service", + "values": ["service1"], + "match_type": "strict", + } + ], + }, + { + "name": "service3", + "attributes": [ + { + "key": "method", + "values": ["method1", "method2"], + "match_type": "strict", + } + ], + }, + ] + }, "kafka": {"trace_correlation": True}, } self.base_options = BaseOptions() - assert self.base_options.span_filters == [ - "service1.*", - "service3.method1", - "service3.method2", - ] + assert self.base_options.span_filters == { + "exclude": [ + { + "name": "service1", + "attributes": [ + { + "key": "service", + "values": ["service1"], + "match_type": "strict", + } + ], + "suppression": True, + }, + { + "name": "service3", + "attributes": [ + { + "key": "method", + "values": ["method1", "method2"], + "match_type": "strict", + } + ], + "suppression": True, + }, + *INTERNAL_SPAN_FILTERS, + ], + "include": [], + } assert self.base_options.kafka_trace_correlation @patch.dict( @@ -95,6 +164,26 @@ def test_base_options_with_env_vars(self) -> None: ], "suppression": True, }, + { + "name": "filter-internal-spans-by-url", + "attributes": [ + { + "key": "http.url", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, + { + "name": "filter-internal-spans-by-host", + "attributes": [ + { + "key": "http.host", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, ], } @@ -187,6 +276,26 @@ def test_base_options_with_endpoint_file(self) -> None: } ], }, + { + "name": "filter-internal-spans-by-url", + "attributes": [ + { + "key": "http.url", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, + { + "name": "filter-internal-spans-by-host", + "attributes": [ + { + "key": "http.host", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, ], } del self.base_options @@ -248,6 +357,26 @@ def test_set_trace_configurations_by_env_variable(self) -> None: ], "suppression": True, }, + { + "name": "filter-internal-spans-by-url", + "attributes": [ + { + "key": "http.url", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, + { + "name": "filter-internal-spans-by-host", + "attributes": [ + { + "key": "http.host", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, ], } assert not self.base_options.kafka_trace_correlation @@ -363,6 +492,26 @@ def test_set_trace_configurations_by_in_code_configuration(self) -> None: } ], }, + { + "name": "filter-internal-spans-by-url", + "attributes": [ + { + "key": "http.url", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, + { + "name": "filter-internal-spans-by-host", + "attributes": [ + { + "key": "http.host", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, ], } @@ -376,23 +525,107 @@ def test_set_trace_configurations_by_in_code_configuration(self) -> None: def test_set_trace_configurations_by_in_code_variable(self) -> None: config["tracing"] = {} - config["tracing"]["filter"] = "config_service1;config_service2:method1,method2" + config["tracing"]["filter"] = { + "exclude": [ + { + "name": "config_service1", + "attributes": [ + { + "key": "service", + "values": ["config_service1"], + "match_type": "strict", + } + ], + }, + { + "name": "config_service2", + "attributes": [ + { + "key": "method", + "values": ["method1", "method2"], + "match_type": "strict", + } + ], + }, + ] + } config["tracing"]["kafka"] = {"trace_correlation": True} - test_tracing = {"filter": "service1;service2:method1,method2"} + test_tracing = { + "filter": { + "exclude": [ + { + "name": "service1", + "attributes": [ + { + "key": "service", + "values": ["service1"], + "match_type": "strict", + } + ], + }, + ] + } + } self.base_options = StandardOptions() self.base_options.set_tracing(test_tracing) - assert self.base_options.span_filters == [ - "config_service1.*", - "config_service2.method1", - "config_service2.method2", - ] + assert self.base_options.span_filters == { + "exclude": [ + { + "name": "config_service1", + "attributes": [ + { + "key": "service", + "values": ["config_service1"], + "match_type": "strict", + } + ], + "suppression": True, + }, + { + "name": "config_service2", + "attributes": [ + { + "key": "method", + "values": ["method1", "method2"], + "match_type": "strict", + } + ], + "suppression": True, + }, + *INTERNAL_SPAN_FILTERS, + ], + "include": [], + } assert self.base_options.kafka_trace_correlation def test_set_trace_configurations_by_agent_configuration(self) -> None: test_tracing = { - "filter": "service1;service2:method1,method2", + "filter": { + "exclude": [ + { + "name": "service1", + "attributes": [ + { + "key": "service", + "values": ["service1"], + "match_type": "strict", + } + ], + }, + { + "name": "service2", + "attributes": [ + { + "key": "method", + "values": ["method1", "method2"], + "match_type": "strict", + } + ], + }, + ] + }, "trace-correlation": True, "disable": [ { @@ -406,11 +639,8 @@ def test_set_trace_configurations_by_agent_configuration(self) -> None: self.base_options = StandardOptions() self.base_options.set_tracing(test_tracing) - assert self.base_options.span_filters == [ - "service1.*", - "service2.method1", - "service2.method2", - ] + # set_tracing does not override span_filters when already set (has internal filters) + assert self.base_options.span_filters == {"exclude": INTERNAL_SPAN_FILTERS} assert self.base_options.kafka_trace_correlation # Check disabled_spans list @@ -423,7 +653,7 @@ def test_set_trace_configurations_by_default(self) -> None: self.base_options = StandardOptions() self.base_options.set_tracing({}) - assert not self.base_options.span_filters + assert self.base_options.span_filters == {"exclude": INTERNAL_SPAN_FILTERS} assert self.base_options.kafka_trace_correlation assert len(self.base_options.disabled_spans) == 0 assert len(self.base_options.enabled_spans) == 0 @@ -529,6 +759,26 @@ def test_tracing_filter_environment_variables(self) -> None: ], "suppression": True, }, + { + "name": "filter-internal-spans-by-url", + "attributes": [ + { + "key": "http.url", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, + { + "name": "filter-internal-spans-by-host", + "attributes": [ + { + "key": "http.host", + "values": ["com.instana"], + "match_type": "contains", + } + ], + }, ], } @@ -570,16 +820,35 @@ def test_set_tracing( self.standart_options = StandardOptions() test_tracing = { - "filter": "service1;service2:method1,method2", + "filter": { + "exclude": [ + { + "name": "service1", + "attributes": [ + { + "key": "service", + "values": ["service1"], + "match_type": "strict", + } + ], + }, + { + "name": "service2", + "attributes": [ + { + "key": "method", + "values": ["method1", "method2"], + "match_type": "strict", + } + ], + }, + ] + }, "kafka": {"trace-correlation": "false", "header-format": "binary"}, } self.standart_options.set_tracing(test_tracing) - assert self.standart_options.span_filters == [ - "service1.*", - "service2.method1", - "service2.method2", - ] + assert self.standart_options.span_filters == {"exclude": INTERNAL_SPAN_FILTERS} assert not self.standart_options.kafka_trace_correlation assert ( "Binary header format for Kafka is deprecated. Please use string header format." @@ -610,7 +879,32 @@ def test_set_from(self) -> None: self.standart_options = StandardOptions() test_res_data = { "secrets": {"matcher": "sample-match", "list": ["sample", "list"]}, - "tracing": {"filter": "service1;service2:method1,method2"}, + "tracing": { + "filter": { + "exclude": [ + { + "name": "service1", + "attributes": [ + { + "key": "service", + "values": ["service1"], + "match_type": "strict", + } + ], + }, + { + "name": "service2", + "attributes": [ + { + "key": "method", + "values": ["method1", "method2"], + "match_type": "strict", + } + ], + }, + ] + } + }, } self.standart_options.set_from(test_res_data) @@ -618,18 +912,16 @@ def test_set_from(self) -> None: self.standart_options.secrets_matcher == test_res_data["secrets"]["matcher"] ) assert self.standart_options.secrets_list == test_res_data["secrets"]["list"] - assert self.standart_options.span_filters == [ - "service1.*", - "service2.method1", - "service2.method2", - ] + assert self.standart_options.span_filters == {"exclude": INTERNAL_SPAN_FILTERS} - test_res_data = { + test_res_data2 = { "extraHeaders": {"header1": "sample-match", "header2": ["sample", "list"]}, } - self.standart_options.set_from(test_res_data) + self.standart_options.set_from(test_res_data2) - assert self.standart_options.extra_http_headers == test_res_data["extraHeaders"] + assert ( + self.standart_options.extra_http_headers == test_res_data2["extraHeaders"] + ) def test_set_from_bool( self, @@ -639,8 +931,7 @@ def test_set_from_bool( caplog.clear() self.standart_options = StandardOptions() - test_res_data = True - self.standart_options.set_from(test_res_data) + self.standart_options.set_from(True) # type: ignore[arg-type] assert len(caplog.messages) == 1 assert len(caplog.records) == 1 @@ -649,7 +940,7 @@ def test_set_from_bool( ) assert self.standart_options.secrets_list == ["key", "pass", "secret"] - assert self.standart_options.span_filters == {} + assert self.standart_options.span_filters == {"exclude": INTERNAL_SPAN_FILTERS} assert not self.standart_options.extra_http_headers @@ -666,7 +957,9 @@ def test_serverless_options(self) -> None: assert self.serverless_options.log_level == logging.WARN assert not self.serverless_options.extra_http_headers assert not self.serverless_options.allow_exit_as_root - assert not self.serverless_options.span_filters + assert self.serverless_options.span_filters == { + "exclude": INTERNAL_SPAN_FILTERS + } assert self.serverless_options.secrets_matcher == "contains-ignore-case" assert self.serverless_options.secrets_list == ["key", "pass", "secret"] assert not self.serverless_options.secrets @@ -811,7 +1104,7 @@ def test_gcr_options(self) -> None: assert self.gcr_options.log_level == logging.WARN assert not self.gcr_options.extra_http_headers assert not self.gcr_options.allow_exit_as_root - assert not self.gcr_options.span_filters + assert self.gcr_options.span_filters == {"exclude": INTERNAL_SPAN_FILTERS} assert self.gcr_options.secrets_matcher == "contains-ignore-case" assert self.gcr_options.secrets_list == ["key", "pass", "secret"] assert not self.gcr_options.secrets diff --git a/tests/util/test_config.py b/tests/util/test_config.py index b77398eb..9ba6d781 100644 --- a/tests/util/test_config.py +++ b/tests/util/test_config.py @@ -4,70 +4,154 @@ from instana.util.config import ( is_truthy, - parse_filtered_endpoints, - parse_filtered_endpoints_dict, - parse_service_pair, + parse_filter_rules, + parse_filter_rules_dict, + parse_filter_rules_string, ) class TestConfig: - def test_parse_service_pair(self) -> None: - test_string = "service1:method1,method2" - response = parse_service_pair(test_string) - assert response == ["service1.method1", "service1.method2"] - - test_string = "service1;service2" - response = parse_filtered_endpoints(test_string) - assert response == ["service1.*", "service2.*"] - - test_string = "service1" - response = parse_filtered_endpoints(test_string) - assert response == ["service1.*"] - - test_string = ";" - response = parse_filtered_endpoints(test_string) - assert response == [] - - test_string = "service1:method1,method2;;;service2:method1;;" - response = parse_filtered_endpoints(test_string) - assert response == [ - "service1.method1", - "service1.method2", - "service2.method1", + def test_parse_filter_rules_string(self) -> None: + """Test parsing of environment variable string format.""" + # Test single rule with strict match + intermediate = { + "exclude": { + "health": { + "name": "health", + "attributes": [], + "suppression": None, + } + }, + "include": {}, + } + result = parse_filter_rules_string( + "http.target;/health;strict", + intermediate, + "exclude", + "health", + ) + assert result["exclude"]["health"]["attributes"] == [ + {"key": "http.target", "values": ["/health"], "match_type": "strict"} ] - test_string = "" - response = parse_filtered_endpoints(test_string) - assert response == [] - - def test_parse_filtered_endpoints_string(self) -> None: - test_string = "service1:method1,method2" - response = parse_service_pair(test_string) - assert response == ["service1.method1", "service1.method2"] - - test_string = "service1;service2" - response = parse_filtered_endpoints(test_string) - assert response == ["service1.*", "service2.*"] + # Test multiple values with comma separation + intermediate = { + "exclude": { + "topics": { + "name": "topics", + "attributes": [], + "suppression": None, + } + }, + "include": {}, + } + result = parse_filter_rules_string( + "kafka.service;topic1,topic2,topic3;strict", + intermediate, + "exclude", + "topics", + ) + assert result["exclude"]["topics"]["attributes"] == [ + { + "key": "kafka.service", + "values": ["topic1", "topic2", "topic3"], + "match_type": "strict", + } + ] - test_string = "service1" - response = parse_filtered_endpoints(test_string) - assert response == ["service1.*"] + # Test multiple rules separated by pipe + intermediate = { + "exclude": { + "multi": { + "name": "multi", + "attributes": [], + "suppression": None, + } + }, + "include": {}, + } + result = parse_filter_rules_string( + "http.target;/health;strict|kafka.service;topic1,topic2;equals", + intermediate, + "exclude", + "multi", + ) + assert len(result["exclude"]["multi"]["attributes"]) == 2 + assert result["exclude"]["multi"]["attributes"][0] == { + "key": "http.target", + "values": ["/health"], + "match_type": "strict", + } + assert result["exclude"]["multi"]["attributes"][1] == { + "key": "kafka.service", + "values": ["topic1", "topic2"], + "match_type": "equals", + } - test_string = ";" - response = parse_filtered_endpoints(test_string) - assert response == [] + # Test default match_type (should be "strict") + intermediate = { + "exclude": { + "default": { + "name": "default", + "attributes": [], + "suppression": None, + } + }, + "include": {}, + } + result = parse_filter_rules_string( + "http.url;/api/v1", + intermediate, + "exclude", + "default", + ) + assert result["exclude"]["default"]["attributes"] == [ + {"key": "http.url", "values": ["/api/v1"], "match_type": "strict"} + ] - test_string = "service1:method1,method2;;;service2:method1;;" - response = parse_filtered_endpoints(test_string) - assert response == [ - "service1.method1", - "service1.method2", - "service2.method1", + # Test with whitespace + intermediate = { + "exclude": { + "whitespace": { + "name": "whitespace", + "attributes": [], + "suppression": None, + } + }, + "include": {}, + } + result = parse_filter_rules_string( + " http.target ; /health , /ready ; strict ", + intermediate, + "exclude", + "whitespace", + ) + assert result["exclude"]["whitespace"]["attributes"] == [ + { + "key": "http.target", + "values": ["/health", "/ready"], + "match_type": "strict", + } ] - test_string = "" - response = parse_filtered_endpoints(test_string) - assert response == [] + # Test invalid format (missing values) - should skip + intermediate = { + "exclude": { + "invalid": { + "name": "invalid", + "attributes": [], + "suppression": None, + } + }, + "include": {}, + } + result = parse_filter_rules_string( + "http.target", + intermediate, + "exclude", + "invalid", + ) + assert result["exclude"]["invalid"]["attributes"] == [] def test_parse_filtered_endpoints_dict(self) -> None: test_dict = { @@ -85,7 +169,7 @@ def test_parse_filtered_endpoints_dict(self) -> None: ], "include": [], } - response = parse_filtered_endpoints_dict(test_dict) + response = parse_filter_rules_dict(test_dict) assert response == { "exclude": [ { @@ -104,38 +188,10 @@ def test_parse_filtered_endpoints_dict(self) -> None: } test_dict = {} - response = parse_filtered_endpoints_dict(test_dict) + response = parse_filter_rules_dict(test_dict) assert response == {"exclude": [], "include": []} def test_parse_filtered_endpoints(self) -> None: - test_pair = "service1:method1,method2" - response = parse_filtered_endpoints(test_pair) - assert response == ["service1.method1", "service1.method2"] - - test_pair = "service1;service2" - response = parse_filtered_endpoints(test_pair) - assert response == ["service1.*", "service2.*"] - - test_pair = "service1" - response = parse_filtered_endpoints(test_pair) - assert response == ["service1.*"] - - test_pair = ";" - response = parse_filtered_endpoints(test_pair) - assert response == [] - - test_pair = "service1:method1,method2;;;service2:method1;;" - response = parse_filtered_endpoints(test_pair) - assert response == [ - "service1.method1", - "service1.method2", - "service2.method1", - ] - - test_pair = "" - response = parse_filtered_endpoints(test_pair) - assert response == [] - test_dict = { "exclude": [ { @@ -151,7 +207,7 @@ def test_parse_filtered_endpoints(self) -> None: ], "include": [], } - response = parse_filtered_endpoints(test_dict) + response = parse_filter_rules(test_dict) assert response == { "exclude": [ { @@ -170,7 +226,7 @@ def test_parse_filtered_endpoints(self) -> None: } test_dict = {} - response = parse_filtered_endpoints(test_dict) + response = parse_filter_rules(test_dict) assert response == {"exclude": [], "include": []} @pytest.mark.parametrize( diff --git a/tests/util/test_config_reader.py b/tests/util/test_config_reader.py index 365730d3..71bead0d 100644 --- a/tests/util/test_config_reader.py +++ b/tests/util/test_config_reader.py @@ -9,7 +9,7 @@ from instana.util.config import ( get_disable_trace_configurations_from_yaml, - parse_filtered_endpoints_from_yaml, + parse_filter_rules_yaml, ) from instana.util.config_reader import ConfigReader @@ -75,9 +75,7 @@ def test_config_reader_yaml_error( def test_load_configuration_with_tracing(self, caplog: "LogCaptureFixture") -> None: caplog.set_level(logging.DEBUG, logger="instana") - span_filters = parse_filtered_endpoints_from_yaml( - "tests/util/test_configuration-1.yaml" - ) + span_filters = parse_filter_rules_yaml("tests/util/test_configuration-1.yaml") # test with tracing assert span_filters == { "exclude": [ @@ -173,9 +171,7 @@ def test_load_configuration_with_tracing(self, caplog: "LogCaptureFixture") -> N def test_load_configuration_legacy(self, caplog: "LogCaptureFixture") -> None: caplog.set_level(logging.DEBUG, logger="instana") - span_filters = parse_filtered_endpoints_from_yaml( - "tests/util/test_configuration-2.yaml" - ) + span_filters = parse_filter_rules_yaml("tests/util/test_configuration-2.yaml") assert span_filters == { "exclude": [ {