diff --git a/README.md b/README.md index b4be3bb..1dbe9b5 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,66 @@ That's it. You're now tracing your code with OpenLLMetry! Now, you need to decide where to export the traces to. +## ⚙️ Configuration + +### Service Name + +You can customize your service name by providing a `name` parameter: + +```ruby +require "traceloop/sdk" + +# Without name parameter (uses OTEL_SERVICE_NAME as-is) +traceloop = Traceloop::SDK::Traceloop.new +# Service name: value of OTEL_SERVICE_NAME, or "unknown_service:ruby" + +# With name parameter (combines name with OTEL_ENVIRONMENT) +traceloop = Traceloop::SDK::Traceloop.new(name: "worker") +# Service name: "worker-production" (if OTEL_ENVIRONMENT="production") +# Service name: "worker-unknown" (if OTEL_ENVIRONMENT not set) +``` + +### Multiple Service Instances + +You can create multiple Traceloop instances with different service names in the same application: + +```ruby +traceloop_api = Traceloop::SDK::Traceloop.new(name: "api") +traceloop_worker = Traceloop::SDK::Traceloop.new(name: "worker") +traceloop_scheduler = Traceloop::SDK::Traceloop.new(name: "scheduler") + +# Each instance traces with its own service name (assuming OTEL_ENVIRONMENT="production"): +# - "api-production" +# - "worker-production" +# - "scheduler-production" +``` + +### Environment Variables + +Control your service naming using standard OpenTelemetry environment variables: + +```bash +# Used when no name parameter is provided +export OTEL_SERVICE_NAME="my-app" + +# Combined with name parameter: "worker-production" +export OTEL_ENVIRONMENT="production" +``` + +Defaults: +- `OTEL_SERVICE_NAME` defaults to `"unknown_service:ruby"` +- `OTEL_ENVIRONMENT` defaults to `"unknown"` + +### Cleanup + +When shutting down your application, ensure spans are properly flushed: + +```ruby +traceloop = Traceloop::SDK::Traceloop.new +# ... use traceloop ... +traceloop.shutdown # Flush remaining spans before exit +``` + ## ⏫ Supported (and tested) destinations - [x] [Traceloop](https://www.traceloop.com/docs/openllmetry/integrations/traceloop) diff --git a/sample-app/Gemfile.lock b/sample-app/Gemfile.lock index ad4418e..8be0a65 100644 --- a/sample-app/Gemfile.lock +++ b/sample-app/Gemfile.lock @@ -30,7 +30,7 @@ GEM faraday-typhoeus (1.1.0) faraday (~> 2.0) typhoeus (~> 1.4) - ffi (1.17.0-arm64-darwin) + ffi (1.17.0) gemini-ai (4.2.0) event_stream_parser (~> 1.0) faraday (~> 2.10) @@ -39,7 +39,7 @@ GEM typhoeus (~> 1.4, >= 1.4.1) google-cloud-env (2.2.1) faraday (>= 1.0, < 3.a) - google-protobuf (3.25.5-arm64-darwin) + google-protobuf (3.25.5) googleapis-common-protos-types (1.16.0) google-protobuf (>= 3.18, < 5.a) googleauth (1.11.2) @@ -100,6 +100,7 @@ GEM PLATFORMS arm64-darwin-23 + x86_64-linux DEPENDENCIES aws-sdk-bedrockruntime (~> 1.14) diff --git a/sample-app/bedrock.rb b/sample-app/bedrock.rb index 08d95eb..fa8505a 100644 --- a/sample-app/bedrock.rb +++ b/sample-app/bedrock.rb @@ -1,8 +1,15 @@ require 'aws-sdk-bedrockruntime' require "traceloop/sdk" +# Example 1: No name parameter (backward compatible) +# Uses OTEL_SERVICE_NAME as-is, or defaults to "unknown_service:ruby" traceloop = Traceloop::SDK::Traceloop.new +# Example 2: With name parameter +# Creates service name as "#{name}-#{OTEL_ENVIRONMENT}" +# If OTEL_ENVIRONMENT="production", this creates "bedrock-worker-production" +# traceloop = Traceloop::SDK::Traceloop.new(name: "bedrock-worker") + model = "anthropic.claude-3-sonnet-20240229-v1:0" traceloop.llm_call(provider="bedrock", model=model) do |tracer| diff --git a/sample-app/gemini.rb b/sample-app/gemini.rb index 6b411da..f261449 100644 --- a/sample-app/gemini.rb +++ b/sample-app/gemini.rb @@ -9,8 +9,15 @@ options: { model: 'gemini-pro', server_sent_events: true } ) +# Example 1: No name parameter (backward compatible) +# Uses OTEL_SERVICE_NAME as-is, or defaults to "unknown_service:ruby" traceloop = Traceloop::SDK::Traceloop.new +# Example 2: With name parameter +# Creates service name as "#{name}-#{OTEL_ENVIRONMENT}" +# If OTEL_ENVIRONMENT="production", this creates "gemini-worker-production" +# traceloop = Traceloop::SDK::Traceloop.new(name: "gemini-worker") + traceloop.llm_call(provider="vertexai", model="gemini-pro") do |tracer| tracer.log_prompt(user_prompt="Tell me a joke about OpenTelemetry") response = client.generate_content( diff --git a/sample-app/openai.rb b/sample-app/openai.rb index 1d3994c..68951e3 100644 --- a/sample-app/openai.rb +++ b/sample-app/openai.rb @@ -7,8 +7,22 @@ client = OpenAI::Client.new +# Example 1: No name parameter (backward compatible) +# Uses OTEL_SERVICE_NAME as-is, or defaults to "unknown_service:ruby" traceloop = Traceloop::SDK::Traceloop.new +# Example 2: With name parameter +# Creates service name as "#{name}-#{OTEL_ENVIRONMENT}" +# If OTEL_ENVIRONMENT="production", this creates "worker-production" +# traceloop_worker = Traceloop::SDK::Traceloop.new(name: "worker") + +# Example 3: Multiple instances with different names +# If OTEL_ENVIRONMENT="production": +# - traceloop_api: "api-production" +# - traceloop_background: "background-production" +# traceloop_api = Traceloop::SDK::Traceloop.new(name: "api") +# traceloop_background = Traceloop::SDK::Traceloop.new(name: "background") + traceloop.workflow("joke_generator") do traceloop.llm_call(provider="openai", model="gpt-3.5-turbo") do |tracer| tracer.log_prompt(user_prompt="Tell me a joke about OpenTelemetry") diff --git a/semantic_conventions_ai/lib/opentelemetry/semantic_conventions.rb b/semantic_conventions_ai/lib/opentelemetry/semantic_conventions.rb index 891eaf9..e0fc86c 100644 --- a/semantic_conventions_ai/lib/opentelemetry/semantic_conventions.rb +++ b/semantic_conventions_ai/lib/opentelemetry/semantic_conventions.rb @@ -30,6 +30,18 @@ module SpanAttributes # Deprecated TRACELOOP_CORRELATION_ID = "traceloop.correlation.id" + + # Gen AI + GEN_AI_REQUEST_MODEL = "gen_ai.request.model" + GEN_AI_RESPONSE_MODEL = "gen_ai.response.model" + GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens" + GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens" + GEN_AI_COMPLETIONS = "gen_ai.completion" + GEN_AI_PROMPTS = "gen_ai.prompt" + GEN_AI_SYSTEM = "gen_ai.system" + GEN_AI_PROVIDER = "gen_ai.provider.name" + GEN_AI_CONVERSATION_ID = "gen_ai.conversation.id" + GEN_AI_BEDROCK_GUARDRAILS = "gen_ai.bedrock.guardrail" end module LLMRequestTypeValues diff --git a/traceloop-sdk/lib/traceloop/sdk.rb b/traceloop-sdk/lib/traceloop/sdk.rb index 640be53..613c077 100644 --- a/traceloop-sdk/lib/traceloop/sdk.rb +++ b/traceloop-sdk/lib/traceloop/sdk.rb @@ -5,20 +5,47 @@ module Traceloop module SDK class Traceloop - def initialize - OpenTelemetry::SDK.configure do |c| - c.add_span_processor( - OpenTelemetry::SDK::Trace::Export::SimpleSpanProcessor.new( - OpenTelemetry::Exporter::OTLP::Exporter.new( - endpoint: "#{ENV.fetch("TRACELOOP_BASE_URL", "https://api.traceloop.com")}/v1/traces", - headers: { "Authorization" => "Bearer #{ENV.fetch("TRACELOOP_API_KEY")}" } - ) - ) - ) - puts "Traceloop exporting traces to #{ENV.fetch("TRACELOOP_BASE", "https://api.traceloop.com")}" - end - - @tracer = OpenTelemetry.tracer_provider.tracer("Traceloop") + def initialize(name: nil) + api_key = ENV["TRACELOOP_API_KEY"] + raise "TRACELOOP_API_KEY environment variable is required" if api_key.nil? || api_key.empty? + + # Construct service name + base_name = ENV["OTEL_SERVICE_NAME"] || "unknown_service:ruby" + otel_environment = ENV["OTEL_ENVIRONMENT"] || "unknown" + @service_name = name ? "#{name}-#{otel_environment}" : base_name + + # Create resource with service name + resource = OpenTelemetry::SDK::Resources::Resource.create( + OpenTelemetry::SemanticConventions::Resource::SERVICE_NAME => @service_name + ) + + # Create instance-specific tracer provider + @tracer_provider = OpenTelemetry::SDK::Trace::TracerProvider.new( + resource: resource + ) + + # Configure OTLP exporter for this instance + exporter = OpenTelemetry::Exporter::OTLP::Exporter.new( + endpoint: "#{ENV.fetch("TRACELOOP_BASE_URL", "https://api.traceloop.com")}/v1/traces", + headers: { + "Authorization" => "#{ENV.fetch("TRACELOOP_AUTH_SCHEME", "Bearer")} #{ENV.fetch("TRACELOOP_API_KEY")}" + } + ) + + # Add span processor to this instance's provider + @tracer_provider.add_span_processor( + OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor.new(exporter) + ) + + puts "Traceloop exporting traces to #{ENV.fetch("TRACELOOP_BASE_URL", "https://api.traceloop.com")}" + puts "Service name: #{@service_name}" + + # Get tracer from instance-specific provider + @tracer = @tracer_provider.tracer("Traceloop", "0.1.5") + end + + def shutdown + @tracer_provider&.shutdown end class Tracer @@ -41,15 +68,15 @@ def log_messages(messages) def log_prompt(system_prompt="", user_prompt) unless system_prompt.empty? @span.add_attributes({ - "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::LLM_PROMPTS}.0.role" => "system", - "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::LLM_PROMPTS}.0.content" => system_prompt, - "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::LLM_PROMPTS}.1.role" => "user", - "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::LLM_PROMPTS}.1.content" => user_prompt + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_PROMPTS}.0.role" => "system", + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_PROMPTS}.0.content" => system_prompt, + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_PROMPTS}.1.role" => "user", + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_PROMPTS}.1.content" => user_prompt }) else @span.add_attributes({ - "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::LLM_PROMPTS}.0.role" => "user", - "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::LLM_PROMPTS}.0.content" => user_prompt + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_PROMPTS}.0.role" => "user", + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_PROMPTS}.0.content" => user_prompt }) end end @@ -57,10 +84,17 @@ def log_prompt(system_prompt="", user_prompt) def log_response(response) if response.respond_to?(:body) log_bedrock_response(response) + # Check for RubyLLM::Message objects + elsif defined?(::RubyLLM::Message) && response.is_a?(::RubyLLM::Message) + log_ruby_llm_message(response) + elsif defined?(::RubyLLM::Tool::Halt) && response.is_a?(::RubyLLM::Tool::Halt) + log_ruby_llm_halt(response) # This is Gemini specific, see - # https://github.com/gbaptista/gemini-ai?tab=readme-ov-file#generate_content - elsif response.has_key?("candidates") + elsif response.respond_to?(:has_key?) && response.has_key?("candidates") log_gemini_response(response) + elsif response.is_a?(String) + log_string_message(response) else log_openai_response(response) end @@ -73,10 +107,38 @@ def log_gemini_response(response) @span.add_attributes({ "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::LLM_COMPLETIONS}.0.role" => "assistant", - "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::LLM_COMPLETIONS}.0.content" => response.dig("candidates", 0, "content", "parts", 0, "text") + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::LLM_COMPLETIONS}.0.content" => response.dig( +"candidates", 0, "content", "parts", 0, "text") }) end + def log_ruby_llm_message(response) + @span.add_attributes({ + OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_RESPONSE_MODEL => response.model_id, + OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_USAGE_OUTPUT_TOKENS => response.output_tokens || 0, + OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_USAGE_INPUT_TOKENS => response.input_tokens || 0, + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_COMPLETIONS}.0.role" => response.role.to_s, + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_COMPLETIONS}.0.content" => response.content + }) + end + + def log_ruby_llm_halt(response) + @span.add_attributes({ + OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_RESPONSE_MODEL => @model, + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_COMPLETIONS}.0.role" => "tool", + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_COMPLETIONS}.0.content" => response.content + }) + end + + # enables users to log messages with raw text that did not come from an LLM, this allows DT to complete traces + def log_string_message(response) + @span.add_attributes({ + OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_RESPONSE_MODEL => @model, + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_COMPLETIONS}.0.role" => "assistant", + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_COMPLETIONS}.0.content" => response + }) + end + def log_bedrock_response(response) body = JSON.parse(response.body.read()) @@ -109,25 +171,134 @@ def log_openai_response(response) }) if response.has_key?("usage") @span.add_attributes({ - OpenTelemetry::SemanticConventionsAi::SpanAttributes::LLM_USAGE_TOTAL_TOKENS => response.dig("usage", "total_tokens"), - OpenTelemetry::SemanticConventionsAi::SpanAttributes::LLM_USAGE_COMPLETION_TOKENS => response.dig("usage", "completion_tokens"), - OpenTelemetry::SemanticConventionsAi::SpanAttributes::LLM_USAGE_PROMPT_TOKENS => response.dig("usage", "prompt_tokens"), + OpenTelemetry::SemanticConventionsAi::SpanAttributes::LLM_USAGE_TOTAL_TOKENS => response.dig("usage", + "total_tokens"), + OpenTelemetry::SemanticConventionsAi::SpanAttributes::LLM_USAGE_COMPLETION_TOKENS => response.dig( +"usage", "completion_tokens"), + OpenTelemetry::SemanticConventionsAi::SpanAttributes::LLM_USAGE_PROMPT_TOKENS => response.dig("usage", + "prompt_tokens"), }) end if response.has_key?("choices") @span.add_attributes({ - "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::LLM_COMPLETIONS}.0.role" => response.dig("choices", 0, "message", "role"), - "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::LLM_COMPLETIONS}.0.content" => response.dig("choices", 0, "message", "content") + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::LLM_COMPLETIONS}.0.role" => response.dig( +"choices", 0, "message", "role"), + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::LLM_COMPLETIONS}.0.content" => response.dig( +"choices", 0, "message", "content") }) end end + + def log_guardrail_response(response) + r = deep_stringify_keys(response || {}) + + activation = guardrail_activation(r) + words_blocked, blocked_words = guardrail_blocked_words(r) + content_filtered, type, confidence = guardrail_content_filtered(r) + + attrs = { + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_PROMPTS}.prompt_filter_results" => [type, confidence].to_s, + + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_BEDROCK_GUARDRAILS}.activation" => activation, + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_BEDROCK_GUARDRAILS}.words" => words_blocked, + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_BEDROCK_GUARDRAILS}.content" => content_filtered, + + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_BEDROCK_GUARDRAILS}.action" => r["action"] || "NONE", + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_BEDROCK_GUARDRAILS}.action_reason" => r["action_reason"] || "No action.", + "#{OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_BEDROCK_GUARDRAILS}.words.blocked_words" => blocked_words.to_s, + } + + @span.add_attributes(attrs) + end + + private + + def deep_stringify_keys(obj) + case obj + when Hash + obj.each_with_object({}) do |(k, v), h| + h[k.to_s] = deep_stringify_keys(v) + end + when Array + obj.map { |v| deep_stringify_keys(v) } + else + obj + end + end + + def guardrail_activation(r) + usage = r["usage"] || {} + + units = + (usage["topic_policy_units"] || 0).to_i + + (usage["content_policy_units"] || 0).to_i + + (usage["word_policy_units"] || 0).to_i + + (usage["sensitive_information_policy_units"] || 0).to_i + + units > 0 || (r["assessments"].is_a?(Array) && !r["assessments"].empty?) + end + + def guardrail_blocked_words(r) + assessments = r["assessments"] || [] + + total = 0 + blocked_words = [] + + assessments.each do |a| + word_policy = a["word_policy"] || {} + + # custom_words: [{ "match" => "API", "action" => "BLOCKED", "detected" => true }] + custom_words = word_policy["custom_words"] || [] + custom_words.each do |cw| + if cw["detected"] == true || cw["action"] == "BLOCKED" + total += 1 + blocked_words.append(cw["match"]) + end + end + + managed_lists = word_policy["managed_word_lists"] || [] + managed_lists.each do |entry| + if entry["detected"] == true || entry["action"] == "BLOCKED" + total += 1 + blocked_words.append(entry["match"]) + end + end + end + + [total, blocked_words] + end + + def guardrail_content_filtered(r) + assessments = r["assessments"] || [] + assessments.each do |a| + filters = a.dig("content_policy", "filters") || [] + filters.each do |f| + detected = f["detected"] + action = f["action"] + type = f["type"] + confidence = f["confidence"] + + return [1, type, confidence] if detected == true || (detected && action != "NONE") + end + end + + [0, "", ""] + end end - def llm_call(provider, model) + def llm_call(provider, model, conversation_id: nil) @tracer.in_span("#{provider}.chat") do |span| - span.add_attributes({ - OpenTelemetry::SemanticConventionsAi::SpanAttributes::LLM_REQUEST_MODEL => model, - }) + attributes = { + OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_REQUEST_MODEL => model, + OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_SYSTEM => provider, + OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_PROVIDER => provider, + } + + if conversation_id + attributes[OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_CONVERSATION_ID] = conversation_id + end + + span.add_attributes(attributes) yield Tracer.new(span, provider, model) end end @@ -171,6 +342,23 @@ def tool(name) yield end end + + def guardrail(name, provider, conversation_id: nil) + @tracer.in_span("#{name}.guardrails") do |span| + attributes = { + OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_SYSTEM => provider, + OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_PROVIDER => provider, + } + + if conversation_id + attributes[OpenTelemetry::SemanticConventionsAi::SpanAttributes::GEN_AI_CONVERSATION_ID] = + conversation_id + end + + span.add_attributes(attributes) + yield + end + end end end end diff --git a/traceloop-sdk/traceloop-sdk.gemspec b/traceloop-sdk/traceloop-sdk.gemspec index b985c60..9acbe62 100644 --- a/traceloop-sdk/traceloop-sdk.gemspec +++ b/traceloop-sdk/traceloop-sdk.gemspec @@ -17,8 +17,8 @@ Gem::Specification.new do |spec| spec.add_dependency 'opentelemetry-semantic_conventions_ai', '~> 0.0.3' - spec.add_dependency 'opentelemetry-sdk', '~> 1.3.1' - spec.add_dependency 'opentelemetry-exporter-otlp', '~> 0.26.1' + spec.add_dependency 'opentelemetry-exporter-otlp', '~> 0.31.1' + spec.add_dependency 'opentelemetry-sdk', '~> 1.10.0' if spec.respond_to?(:metadata) spec.metadata['source_code_uri'] = 'https://github.com/traceloop/openllmetry-ruby/tree/main/traceloop-sdk'