diff --git a/.github/workflows/terraform-mn-vectorization.yml b/.github/workflows/terraform-mn-vectorization.yml new file mode 100644 index 0000000..bdf1b8c --- /dev/null +++ b/.github/workflows/terraform-mn-vectorization.yml @@ -0,0 +1,74 @@ +name: "Terraform — MN Vectorization" + +on: + pull_request: + paths: + - "mn-vectorization/infra/**" + push: + branches: + - main + paths: + - "mn-vectorization/infra/**" + +permissions: + contents: read + +env: + TF_WORKING_DIR: mn-vectorization/infra + TF_ENV: dev + AWS_REGION: us-east-1 + +jobs: + terraform: + name: Terraform + runs-on: ubuntu-latest + defaults: + run: + working-directory: ${{ env.TF_WORKING_DIR }} + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + with: + terraform_version: "~1.5" + + # NOTE: Using static keys for POC. OIDC migration deferred until + # Nomad provisions IAM OIDC identity provider + role in AWS. + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ secrets.MN_VECTORIZATION_AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.MN_VECTORIZATION_AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + + - name: Terraform Format Check + run: terraform fmt -check -recursive + + - name: Terraform Init + run: | + terraform init \ + -backend-config="bucket=sf-terraform-state" \ + -backend-config="key=mn-vectorization/${{ env.TF_ENV }}/terraform.tfstate" \ + -backend-config="region=${{ env.AWS_REGION }}" + + - name: Terraform Validate + run: terraform validate + + - name: Terraform Plan + if: github.event_name == 'pull_request' + run: | + set +e + terraform plan -var-file=environments/${{ env.TF_ENV }}.tfvars -no-color -input=false -detailed-exitcode + ec=$? + if [ "$ec" -eq 1 ]; then + echo "::error::Terraform plan failed" + exit 1 + fi + exit 0 + + - name: Terraform Apply + if: github.ref == 'refs/heads/main' && github.event_name == 'push' + run: terraform apply -var-file=environments/${{ env.TF_ENV }}.tfvars -auto-approve -input=false diff --git a/mn-vectorization/infra/.gitignore b/mn-vectorization/infra/.gitignore new file mode 100644 index 0000000..7ae4b62 --- /dev/null +++ b/mn-vectorization/infra/.gitignore @@ -0,0 +1,4 @@ +.terraform/ +*.tfstate +*.tfstate.backup +*.tfplan diff --git a/mn-vectorization/infra/backend.tf b/mn-vectorization/infra/backend.tf new file mode 100644 index 0000000..12c0dbe --- /dev/null +++ b/mn-vectorization/infra/backend.tf @@ -0,0 +1,3 @@ +terraform { + backend "s3" {} +} diff --git a/mn-vectorization/infra/cloudwatch.tf b/mn-vectorization/infra/cloudwatch.tf new file mode 100644 index 0000000..927da17 --- /dev/null +++ b/mn-vectorization/infra/cloudwatch.tf @@ -0,0 +1,75 @@ +# ----------------------------------------------------- +# CloudWatch log groups +# ----------------------------------------------------- + +resource "aws_cloudwatch_log_group" "main" { + for_each = local.log_groups + name = each.value + retention_in_days = var.log_retention_days + tags = { Name = each.value } +} + +# ----------------------------------------------------- +# CloudWatch alarms +# ----------------------------------------------------- + +# Alarm 1: Indexing failures (custom metric from Temporal worker) +resource "aws_cloudwatch_metric_alarm" "indexing_failures" { + count = var.is_alarm_enabled ? 1 : 0 + alarm_name = "${local.name_prefix}_indexing_failures_alarm" + comparison_operator = "GreaterThanThreshold" + evaluation_periods = 1 + metric_name = "IndexingFailures" + namespace = "MNVectorization/${var.environment}" + period = 300 + statistic = "Sum" + threshold = 0 + alarm_description = "Indexing pipeline failure detected" + treat_missing_data = "notBreaching" + + alarm_actions = var.alarm_sns_topic_arn != "" ? [var.alarm_sns_topic_arn] : [] + + tags = { Name = "${local.name_prefix}_indexing_failures_alarm" } +} + +# Alarm 2: Query latency p99 (custom metric from MCP server) +resource "aws_cloudwatch_metric_alarm" "query_latency_p99" { + count = var.is_alarm_enabled ? 1 : 0 + alarm_name = "${local.name_prefix}_query_latency_p99_alarm" + comparison_operator = "GreaterThanThreshold" + evaluation_periods = 2 + metric_name = "QueryLatencyP99" + namespace = "MNVectorization/${var.environment}" + period = 300 + statistic = "Maximum" + threshold = 30000 + alarm_description = "Query p99 latency exceeds 30s" + treat_missing_data = "notBreaching" + + alarm_actions = var.alarm_sns_topic_arn != "" ? [var.alarm_sns_topic_arn] : [] + + tags = { Name = "${local.name_prefix}_query_latency_p99_alarm" } +} + +# Alarm 3: DynamoDB throttling (per table) +resource "aws_cloudwatch_metric_alarm" "dynamodb_throttling" { + for_each = var.is_alarm_enabled ? local.dynamodb_tables : {} + alarm_name = "${local.name_prefix}_${each.key}_throttling_alarm" + comparison_operator = "GreaterThanThreshold" + evaluation_periods = 1 + metric_name = "ThrottledRequests" + namespace = "AWS/DynamoDB" + period = 60 + statistic = "Sum" + threshold = 0 + alarm_description = "DynamoDB throttling on ${each.key} table" + treat_missing_data = "notBreaching" + + dimensions = { + TableName = aws_dynamodb_table.main[each.key].name + } + + alarm_actions = var.alarm_sns_topic_arn != "" ? [var.alarm_sns_topic_arn] : [] + + tags = { Name = "${local.name_prefix}_${each.key}_throttling_alarm" } +} diff --git a/mn-vectorization/infra/data.tf b/mn-vectorization/infra/data.tf new file mode 100644 index 0000000..2bdab01 --- /dev/null +++ b/mn-vectorization/infra/data.tf @@ -0,0 +1,11 @@ +data "aws_vpc" "existing" { + id = var.vpc_id +} + +data "aws_instance" "existing" { + instance_id = var.ec2_instance_id +} + +data "aws_caller_identity" "current" {} + +data "aws_region" "current" {} diff --git a/mn-vectorization/infra/dynamodb.tf b/mn-vectorization/infra/dynamodb.tf new file mode 100644 index 0000000..c673074 --- /dev/null +++ b/mn-vectorization/infra/dynamodb.tf @@ -0,0 +1,30 @@ +# ----------------------------------------------------- +# DynamoDB tables for task state and user ACL +# Created via for_each over local.dynamodb_tables +# ----------------------------------------------------- + +resource "aws_dynamodb_table" "main" { + for_each = local.dynamodb_tables + name = "${local.name_prefix}_${each.key}_ddb" + billing_mode = "PAY_PER_REQUEST" + hash_key = each.value.hash_key + + attribute { + name = each.value.hash_key + type = "S" + } + + dynamic "ttl" { + for_each = each.value.ttl_attr != null ? [each.value.ttl_attr] : [] + content { + attribute_name = ttl.value + enabled = true + } + } + + point_in_time_recovery { + enabled = var.environment == "prod" + } + + tags = { Name = "${local.name_prefix}_${each.key}_ddb" } +} diff --git a/mn-vectorization/infra/environments/dev.tfvars b/mn-vectorization/infra/environments/dev.tfvars new file mode 100644 index 0000000..466d839 --- /dev/null +++ b/mn-vectorization/infra/environments/dev.tfvars @@ -0,0 +1,21 @@ +environment = "dev" +aws_region = "us-east-1" +billing_tag = "mn-vectorization" + +# Existing infrastructure — replace with actual IDs +vpc_id = "vpc-385f9a56" +ec2_instance_id = "i-XXXXXXXXXXXXXXXXX" + +# CloudWatch +log_retention_days = 14 +alarm_sns_topic_arn = "" +is_alarm_enabled = false + +# MCP Server +mcp_server_port = 3000 + +# Encryption +is_kms_enabled = false + +# S3 lifecycle +embeddings_expiry_days = 0 diff --git a/mn-vectorization/infra/environments/prod.tfvars b/mn-vectorization/infra/environments/prod.tfvars new file mode 100644 index 0000000..e20949e --- /dev/null +++ b/mn-vectorization/infra/environments/prod.tfvars @@ -0,0 +1,22 @@ +environment = "prod" +aws_region = "us-east-1" +billing_tag = "mn-vectorization" + +# Existing infrastructure — replace with actual IDs +vpc_id = "vpc-XXXXXXXXXXXXXXXXX" +ec2_instance_id = "i-XXXXXXXXXXXXXXXXX" + +# CloudWatch +log_retention_days = 90 +alarm_sns_topic_arn = "arn:aws:sns:us-east-1:891612588877:mn-vectorization-alerts" +is_alarm_enabled = true + +# MCP Server +mcp_server_port = 3000 + +# Encryption +is_kms_enabled = false +# kms_key_arn = "arn:aws:kms:us-east-1:891612588877:key/XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" + +# S3 lifecycle +embeddings_expiry_days = 0 diff --git a/mn-vectorization/infra/environments/staging.tfvars b/mn-vectorization/infra/environments/staging.tfvars new file mode 100644 index 0000000..614d2bb --- /dev/null +++ b/mn-vectorization/infra/environments/staging.tfvars @@ -0,0 +1,21 @@ +environment = "staging" +aws_region = "us-east-1" +billing_tag = "mn-vectorization" + +# Existing infrastructure — replace with actual IDs +vpc_id = "vpc-XXXXXXXXXXXXXXXXX" +ec2_instance_id = "i-XXXXXXXXXXXXXXXXX" + +# CloudWatch +log_retention_days = 30 +alarm_sns_topic_arn = "" +is_alarm_enabled = true + +# MCP Server +mcp_server_port = 3000 + +# Encryption +is_kms_enabled = false + +# S3 lifecycle +embeddings_expiry_days = 0 diff --git a/mn-vectorization/infra/iam.tf b/mn-vectorization/infra/iam.tf new file mode 100644 index 0000000..acff2ec --- /dev/null +++ b/mn-vectorization/infra/iam.tf @@ -0,0 +1,181 @@ +# ----------------------------------------------------- +# IAM role for the EC2 worker (Temporal + MCP Server) +# ----------------------------------------------------- + +resource "aws_iam_role" "worker" { + name = "${local.name_prefix}_worker_role" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Principal = { Service = "ec2.amazonaws.com" } + Action = "sts:AssumeRole" + }] + }) + + tags = { Name = "${local.name_prefix}_worker_role" } +} + +resource "aws_iam_instance_profile" "worker" { + name = "${local.name_prefix}_worker_profile" + role = aws_iam_role.worker.name +} + +# --- Policy 1: S3 access --- + +resource "aws_iam_role_policy" "s3_access" { + name = "s3-access" + role = aws_iam_role.worker.id + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Sid = "BucketObjects" + Effect = "Allow" + Action = ["s3:GetObject", "s3:PutObject", "s3:DeleteObject"] + Resource = "${aws_s3_bucket.artifacts.arn}/*" + }, + { + Sid = "BucketList" + Effect = "Allow" + Action = ["s3:ListBucket"] + Resource = aws_s3_bucket.artifacts.arn + } + ] + }) +} + +# --- Policy 2: DynamoDB access --- + +resource "aws_iam_role_policy" "dynamodb_access" { + name = "dynamodb-access" + role = aws_iam_role.worker.id + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Action = [ + "dynamodb:GetItem", + "dynamodb:PutItem", + "dynamodb:UpdateItem", + "dynamodb:DeleteItem", + "dynamodb:Query", + "dynamodb:Scan" + ] + Resource = [ + for t in aws_dynamodb_table.main : t.arn + ] + }, + { + Effect = "Allow" + Action = [ + "dynamodb:Query", + "dynamodb:Scan" + ] + Resource = [ + for t in aws_dynamodb_table.main : "${t.arn}/index/*" + ] + }] + }) +} + +# --- Policy 3: Bedrock access --- + +resource "aws_iam_role_policy" "bedrock_access" { + name = "bedrock-access" + role = aws_iam_role.worker.id + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Action = [ + "bedrock:InvokeModel", + "bedrock:InvokeModelWithResponseStream" + ] + Resource = [ + "arn:aws:bedrock:${data.aws_region.current.name}::foundation-model/anthropic.*", + "arn:aws:bedrock:${data.aws_region.current.name}::foundation-model/cohere.*" + ] + }, + { + Effect = "Allow" + Action = ["bedrock:ApplyGuardrail"] + Resource = "arn:aws:bedrock:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:guardrail/*" + }] + }) +} + +# --- Policy 4: CloudWatch Logs --- + +resource "aws_iam_role_policy" "cloudwatch_logs" { + name = "cloudwatch-logs" + role = aws_iam_role.worker.id + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Action = [ + "logs:CreateLogStream", + "logs:PutLogEvents", + "logs:DescribeLogStreams" + ] + Resource = [ + for lg in aws_cloudwatch_log_group.main : "${lg.arn}:*" + ] + }] + }) +} + +# --- Policy 5: Secrets Manager --- + +resource "aws_iam_role_policy" "secrets_access" { + name = "secrets-access" + role = aws_iam_role.worker.id + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Action = [ + "secretsmanager:GetSecretValue", + "secretsmanager:DescribeSecret" + ] + Resource = [ + for s in aws_secretsmanager_secret.main : s.arn + ] + }] + }) +} + +# --- Policy 6: KMS (conditional) --- + +resource "aws_iam_role_policy" "kms_access" { + count = var.is_kms_enabled ? 1 : 0 + name = "kms-access" + role = aws_iam_role.worker.id + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Action = [ + "kms:Decrypt", + "kms:GenerateDataKey", + "kms:DescribeKey" + ] + Resource = [var.kms_key_arn] + }] + }) + + lifecycle { + precondition { + condition = var.kms_key_arn != "" + error_message = "kms_key_arn must be set when is_kms_enabled = true" + } + } +} diff --git a/mn-vectorization/infra/locals.tf b/mn-vectorization/infra/locals.tf new file mode 100644 index 0000000..54ea8a3 --- /dev/null +++ b/mn-vectorization/infra/locals.tf @@ -0,0 +1,44 @@ +locals { + name_prefix = "${var.project_name}_${var.environment}" + name_prefix_s3 = "${replace(var.project_name, "_", "-")}-${var.environment}" + + # DynamoDB tables — iterated via for_each + dynamodb_tables = { + tasks = { + hash_key = "task_id" + ttl_attr = "expires_at" + } + users = { + hash_key = "email" + ttl_attr = null + } + } + + # Secrets Manager entries — iterated via for_each + secrets = { + anthropic_api_key = { + name = "${var.project_name}/${var.environment}/anthropic-api-key" + value = var.anthropic_api_key + } + cohere_api_key = { + name = "${var.project_name}/${var.environment}/cohere-api-key" + value = var.cohere_api_key + } + qdrant_api_key = { + name = "${var.project_name}/${var.environment}/qdrant-api-key" + value = var.qdrant_api_key + } + qdrant_url = { + name = "${var.project_name}/${var.environment}/qdrant-url" + value = var.qdrant_url + } + } + + # CloudWatch log groups — iterated via for_each + log_groups = { + ingestion = "/aws/${var.project_name}/${var.environment}/ingestion" + retrieval = "/aws/${var.project_name}/${var.environment}/retrieval" + mcp_server = "/aws/${var.project_name}/${var.environment}/mcp-server" + temporal_worker = "/aws/${var.project_name}/${var.environment}/temporal-worker" + } +} diff --git a/mn-vectorization/infra/outputs.tf b/mn-vectorization/infra/outputs.tf new file mode 100644 index 0000000..4025736 --- /dev/null +++ b/mn-vectorization/infra/outputs.tf @@ -0,0 +1,82 @@ +# ----------------------------------------------------- +# S3 +# ----------------------------------------------------- + +output "s3_bucket_arn" { + description = "ARN of the artifacts S3 bucket" + value = aws_s3_bucket.artifacts.arn +} + +output "s3_bucket_name" { + description = "Name of the artifacts S3 bucket" + value = aws_s3_bucket.artifacts.id +} + +# ----------------------------------------------------- +# DynamoDB +# ----------------------------------------------------- + +output "dynamodb_table_arns" { + description = "ARNs of DynamoDB tables keyed by table name" + value = { for k, t in aws_dynamodb_table.main : k => t.arn } +} + +output "dynamodb_table_names" { + description = "Names of DynamoDB tables keyed by table name" + value = { for k, t in aws_dynamodb_table.main : k => t.name } +} + +# ----------------------------------------------------- +# IAM +# ----------------------------------------------------- + +output "worker_role_arn" { + description = "ARN of the EC2 worker IAM role" + value = aws_iam_role.worker.arn +} + +output "worker_instance_profile_name" { + description = "Name of the IAM instance profile for the EC2 worker" + value = aws_iam_instance_profile.worker.name +} + +# ----------------------------------------------------- +# Secrets Manager +# ----------------------------------------------------- + +output "secret_arns" { + description = "ARNs of Secrets Manager secrets keyed by secret key" + value = { for k, s in aws_secretsmanager_secret.main : k => s.arn } +} + +# ----------------------------------------------------- +# CloudWatch +# ----------------------------------------------------- + +output "log_group_names" { + description = "CloudWatch log group names keyed by component" + value = { for k, lg in aws_cloudwatch_log_group.main : k => lg.name } +} + +# ----------------------------------------------------- +# Security Group +# ----------------------------------------------------- + +output "mcp_security_group_id" { + description = "ID of the MCP server security group" + value = aws_security_group.mcp.id +} + +# ----------------------------------------------------- +# Existing infrastructure +# ----------------------------------------------------- + +output "vpc_cidr_block" { + description = "CIDR block of the existing VPC" + value = data.aws_vpc.existing.cidr_block +} + +output "ec2_instance_private_ip" { + description = "Private IP of the existing EC2 instance" + value = data.aws_instance.existing.private_ip +} diff --git a/mn-vectorization/infra/provider.tf b/mn-vectorization/infra/provider.tf new file mode 100644 index 0000000..6754b38 --- /dev/null +++ b/mn-vectorization/infra/provider.tf @@ -0,0 +1,22 @@ +terraform { + required_version = ">= 1.5, < 2.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } +} + +provider "aws" { + region = var.aws_region + + default_tags { + tags = { + Billing = var.billing_tag + Environment = var.environment + Project = var.project_name + } + } +} diff --git a/mn-vectorization/infra/s3.tf b/mn-vectorization/infra/s3.tf new file mode 100644 index 0000000..206ab7a --- /dev/null +++ b/mn-vectorization/infra/s3.tf @@ -0,0 +1,64 @@ +# ----------------------------------------------------- +# S3 bucket for meeting transcript artifacts +# Prefixes (app-managed): raw/, translated/, chunks_l0/, +# chunks_l1/, summaries/, embeddings/ +# ----------------------------------------------------- + +resource "aws_s3_bucket" "artifacts" { + bucket = "${local.name_prefix_s3}-bk" + tags = { Name = "${local.name_prefix_s3}-bk" } +} + +resource "aws_s3_bucket_versioning" "artifacts" { + bucket = aws_s3_bucket.artifacts.id + versioning_configuration { + status = "Enabled" + } +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "artifacts" { + bucket = aws_s3_bucket.artifacts.id + + rule { + apply_server_side_encryption_by_default { + sse_algorithm = var.is_kms_enabled ? "aws:kms" : "AES256" + kms_master_key_id = var.is_kms_enabled ? var.kms_key_arn : null + } + bucket_key_enabled = var.is_kms_enabled + } +} + +resource "aws_s3_bucket_public_access_block" "artifacts" { + bucket = aws_s3_bucket.artifacts.id + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +resource "aws_s3_bucket_lifecycle_configuration" "artifacts" { + bucket = aws_s3_bucket.artifacts.id + + rule { + id = "abort-incomplete-multipart" + status = "Enabled" + filter {} + abort_incomplete_multipart_upload { + days_after_initiation = 7 + } + } + + dynamic "rule" { + for_each = var.embeddings_expiry_days > 0 ? [1] : [] + content { + id = "expire-embeddings" + status = "Enabled" + filter { + prefix = "embeddings/" + } + expiration { + days = var.embeddings_expiry_days + } + } + } +} diff --git a/mn-vectorization/infra/secrets.tf b/mn-vectorization/infra/secrets.tf new file mode 100644 index 0000000..bcb01dc --- /dev/null +++ b/mn-vectorization/infra/secrets.tf @@ -0,0 +1,29 @@ +# ----------------------------------------------------- +# Secrets Manager — API keys and connection strings +# Values are bootstrapped via TF_VAR_ env vars or +# set out-of-band via aws secretsmanager put-secret-value +# ----------------------------------------------------- +# +# NOTE: Secret values are bootstrapped via TF_VAR_ env vars with placeholder +# defaults ("CHANGE_ME"). The initial secret_string passes through Terraform +# state ONCE during creation. After bootstrap, replace values out-of-band: +# aws secretsmanager put-secret-value --secret-id --secret-string +# The lifecycle { ignore_changes = [secret_string] } block ensures Terraform +# won't overwrite manually-set values on subsequent applies. + +resource "aws_secretsmanager_secret" "main" { + for_each = local.secrets + name = each.value.name + recovery_window_in_days = var.environment == "prod" ? 30 : 0 + tags = { Name = each.value.name } +} + +resource "aws_secretsmanager_secret_version" "main" { + for_each = local.secrets + secret_id = aws_secretsmanager_secret.main[each.key].id + secret_string = each.value.value + + lifecycle { + ignore_changes = [secret_string] + } +} diff --git a/mn-vectorization/infra/security_group.tf b/mn-vectorization/infra/security_group.tf new file mode 100644 index 0000000..412a4a1 --- /dev/null +++ b/mn-vectorization/infra/security_group.tf @@ -0,0 +1,61 @@ +# ----------------------------------------------------- +# MCP Server security group +# Created for the existing EC2 instance (manual attachment required) +# ----------------------------------------------------- + +resource "aws_security_group" "mcp" { + name = "${local.name_prefix}_mcp_sg" + description = "MCP server traffic for MN Vectorization" + vpc_id = data.aws_vpc.existing.id + tags = { Name = "${local.name_prefix}_mcp_sg" } +} + +# --- Ingress rules --- + +# VPC -> MCP server on SSE port (covers ALB + internal traffic) +resource "aws_security_group_rule" "mcp_vpc_ingress" { + type = "ingress" + from_port = var.mcp_server_port + to_port = var.mcp_server_port + protocol = "tcp" + cidr_blocks = [data.aws_vpc.existing.cidr_block] + security_group_id = aws_security_group.mcp.id + description = "VPC ingress to MCP server (ALB + internal)" +} + +# --- Egress rules --- + +# HTTPS egress (Bedrock, Qdrant, S3, DynamoDB, Secrets Manager, Sentry) +# NOTE: 0.0.0.0/0 is intentional — external SaaS APIs (Anthropic, Cohere, +# Qdrant Cloud, Sentry) have rotating IPs; CIDR allowlist is impractical. +# Production: add VPC endpoints for AWS services to reduce egress scope. +resource "aws_security_group_rule" "mcp_https_egress" { + type = "egress" + from_port = 443 + to_port = 443 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] #tfsec:ignore:aws-vpc-no-public-egress-sgr + security_group_id = aws_security_group.mcp.id + description = "HTTPS egress for AWS services and external APIs" +} + +# DNS egress +resource "aws_security_group_rule" "mcp_dns_egress_tcp" { + type = "egress" + from_port = 53 + to_port = 53 + protocol = "tcp" + cidr_blocks = [data.aws_vpc.existing.cidr_block] + security_group_id = aws_security_group.mcp.id + description = "DNS TCP egress" +} + +resource "aws_security_group_rule" "mcp_dns_egress_udp" { + type = "egress" + from_port = 53 + to_port = 53 + protocol = "udp" + cidr_blocks = [data.aws_vpc.existing.cidr_block] + security_group_id = aws_security_group.mcp.id + description = "DNS UDP egress" +} diff --git a/mn-vectorization/infra/variables.tf b/mn-vectorization/infra/variables.tf new file mode 100644 index 0000000..34822e2 --- /dev/null +++ b/mn-vectorization/infra/variables.tf @@ -0,0 +1,142 @@ +# ----------------------------------------------------- +# Project +# ----------------------------------------------------- + +variable "project_name" { + type = string + default = "mn_vectorization" + description = "Project name used in resource naming" +} + +variable "environment" { + type = string + description = "Deployment environment" + validation { + condition = contains(["dev", "staging", "prod"], var.environment) + error_message = "Must be dev, staging, or prod." + } +} + +variable "aws_region" { + type = string + default = "us-east-1" + description = "AWS region for all resources" +} + +variable "billing_tag" { + type = string + default = "mn-vectorization" + description = "Billing tag applied to all resources via default_tags" +} + +# ----------------------------------------------------- +# Existing infrastructure references +# ----------------------------------------------------- + +variable "vpc_id" { + type = string + description = "ID of the existing VPC" +} + +variable "ec2_instance_id" { + type = string + description = "ID of the existing EC2 instance running Temporal + MCP Server" +} + +# ----------------------------------------------------- +# Secrets (sensitive) +# ----------------------------------------------------- + +variable "anthropic_api_key" { + type = string + sensitive = true + default = "CHANGE_ME" + description = "Anthropic API key for Bedrock access" +} + +variable "cohere_api_key" { + type = string + sensitive = true + default = "CHANGE_ME" + description = "Cohere API key for embeddings and reranking" +} + +variable "qdrant_api_key" { + type = string + sensitive = true + default = "CHANGE_ME" + description = "Qdrant Cloud API key" +} + +variable "qdrant_url" { + type = string + sensitive = true + default = "CHANGE_ME" + description = "Qdrant Cloud cluster URL" +} + +# ----------------------------------------------------- +# CloudWatch +# ----------------------------------------------------- + +variable "log_retention_days" { + type = number + default = 30 + description = "CloudWatch log group retention in days" + validation { + condition = contains([1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, 3653], var.log_retention_days) + error_message = "Must be a valid CloudWatch retention value." + } +} + +variable "alarm_sns_topic_arn" { + type = string + default = "" + description = "SNS topic ARN for CloudWatch alarm notifications (empty = no notifications)" +} + +# ----------------------------------------------------- +# MCP Server +# ----------------------------------------------------- + +variable "mcp_server_port" { + type = number + default = 3000 + description = "Port the MCP server listens on (SSE transport)" +} + +# ----------------------------------------------------- +# Encryption +# ----------------------------------------------------- + +variable "is_kms_enabled" { + type = bool + default = false + description = "Use SSE-KMS instead of SSE-S3 for S3 encryption" +} + +variable "kms_key_arn" { + type = string + default = "" + description = "KMS key ARN for S3 encryption (required if is_kms_enabled = true)" + validation { + condition = var.kms_key_arn == "" || can(regex("^arn:aws:kms:", var.kms_key_arn)) + error_message = "Must be a valid KMS key ARN or empty string." + } +} + +# ----------------------------------------------------- +# Optional features +# ----------------------------------------------------- + +variable "is_alarm_enabled" { + type = bool + default = true + description = "Create CloudWatch alarms" +} + +variable "embeddings_expiry_days" { + type = number + default = 0 + description = "Days before S3 embeddings/ prefix objects expire (0 = no expiry)" +}