This guide provides comprehensive documentation for configuring Continuum Router. The router supports multiple configuration methods with a clear priority system to provide maximum flexibility for different deployment scenarios.
Configuration sections:
Server & Backends — Server settings, backend providers, and connection options
Health & Caching — Health checks, request settings, retry, caching, and logging
Security & Admin — API keys, authentication, WebUI, admin endpoints, and ACP
Advanced — Global prompts, model metadata, hot reload, tracing, load balancing, rate limiting
Examples & Migration — Configuration examples, migration guide, and Rust Builder API
Configuration is applied in the following priority order (highest to lowest):
Command-line arguments (highest priority)
Environment variables
Configuration file
Default values (lowest priority)
This allows you to: - Set base configuration in a file - Override specific settings via environment variables in containers - Make temporary adjustments using command-line arguments
# Continuum Router Configuration# This example shows all available configuration options with their default values# Server configurationserver:# bind_address accepts a single string or an array of addresses# TCP format: "host:port", Unix socket format: "unix:/path/to/socket"bind_address:"0.0.0.0:8080"# Single address (backward compatible)# bind_address: # Or multiple addresses:# - "0.0.0.0:8080" # TCP on all interfaces# - "unix:/var/run/router.sock" # Unix socket (Unix/Linux/macOS only)# socket_mode: 0o660 # Optional: Unix socket file permissionsworkers:4# Number of worker threads (0 = auto-detect)connection_pool_size:100# Max idle connections per backend# Model metadata configuration (optional)model_metadata_file:"model-metadata.yaml"# Path to external model metadata file# Backend configurationbackends:# Native OpenAI API with built-in configuration-name:"openai"type:openai# Use native OpenAI backendapi_key:"${CONTINUUM_OPENAI_API_KEY}"# Loaded from environmentorg_id:"${CONTINUUM_OPENAI_ORG_ID}"# Optional organization IDweight:3models:# Specify which models to use-gpt-4o-gpt-4o-mini-o3-mini-text-embedding-3-largeretry_override:# Backend-specific retry settings (optional)max_attempts:5base_delay:"200ms"max_delay:"30s"exponential_backoff:truejitter:true# Generic OpenAI-compatible backend with custom metadata-name:"openai-compatible"url:"https://custom-llm.example.com"weight:1models:-"gpt-4"-"gpt-3.5-turbo"model_configs:# Enhanced model configuration with metadata-id:"gpt-4"aliases:# Alternative IDs that share this metadata (optional)-"gpt-4-0125-preview"-"gpt-4-turbo-preview"metadata:display_name:"GPT-4"summary:"MostcapableGPT-4modelforcomplextasks"capabilities:["text","image","function_calling"]knowledge_cutoff:"2024-04"pricing:input_tokens:0.03output_tokens:0.06limits:context_window:128000max_output:4096# Ollama local server with automatic URL detection-name:"local-ollama"type:ollama# Defaults to http://localhost:11434weight:2models:-"llama2"-"mistral"-"codellama"# vLLM server-name:"vllm-server"type:vllmurl:"http://localhost:8000"weight:1# Models will be discovered automatically if not specified# Models with namespace prefixes (e.g., "custom/gpt-4") will automatically# match metadata for base names (e.g., "gpt-4")# Google Gemini API (native backend)-name:"gemini"type:gemini# Use native Gemini backendapi_key:"${CONTINUUM_GEMINI_API_KEY}"# Loaded from environmentweight:2models:-gemini-3.1-pro-preview-gemini-3-flash-preview-gemini-2.5-pro-gemini-2.5-flash# Health monitoring configurationhealth_checks:enabled:true# Enable/disable health checksinterval:"30s"# How often to check backend healthtimeout:"10s"# Timeout for health check requestsunhealthy_threshold:3# Failures before marking unhealthyhealthy_threshold:2# Successes before marking healthyendpoint:"/v1/models"# Endpoint used for health checks# Request handling and timeout configurationtimeouts:connection:"10s"# TCP connection establishment timeoutrequest:standard:# Non-streaming requestsfirst_byte:"30s"# Time to receive first bytetotal:"180s"# Total request timeout (3 minutes)streaming:# Streaming (SSE) requestsfirst_byte:"60s"# Time to first SSE chunkchunk_interval:"30s"# Max time between chunkstotal:"600s"# Total streaming timeout (10 minutes)image_generation:# Image generation requests (DALL-E, etc.)first_byte:"60s"# Time to receive first bytetotal:"180s"# Total timeout (3 minutes default)model_overrides:# Model-specific timeout overridesgpt-5-latest:streaming:total:"1200s"# 20 minutes for GPT-5gpt-4o:streaming:total:"900s"# 15 minutes for GPT-4ohealth_check:timeout:"5s"# Health check timeoutinterval:"30s"# Health check intervalrequest:max_retries:3# Maximum retry attempts for requestsretry_delay:"1s"# Initial delay between retries# Global retry and resilience configurationretry:max_attempts:3# Maximum retry attemptsbase_delay:"100ms"# Base delay between retriesmax_delay:"30s"# Maximum delay between retriesexponential_backoff:true# Use exponential backoffjitter:true# Add random jitter to delays# Caching and optimization configurationcache:model_cache_ttl:"300s"# Cache model lists for 5 minutesdeduplication_ttl:"60s"# Deduplicate requests for 1 minuteenable_deduplication:true# Enable request deduplication# Logging configurationlogging:level:"info"# Log level: trace, debug, info, warn, errorformat:"json"# Log format: json, prettyenable_colors:false# Enable colored output (for pretty format)# Files API configurationfiles:enabled:true# Enable/disable Files API endpointsmax_file_size:536870912# Maximum file size in bytes (default: 512MB)storage_path:"./data/files"# Storage path for uploaded files (supports ~)retention_days:0# File retention in days (0 = keep forever)metadata_storage:persistent# Metadata backend: "memory" or "persistent" (default)cleanup_orphans_on_startup:false# Auto-cleanup orphaned files on startup# Authentication and authorizationauth:method:api_key# "none" or "api_key" (default)required_scope:files# API key scope required for accessenforce_ownership:true# Users can only access their own filesadmin_can_access_all:true# Admin scope grants access to all files# Load balancing configurationload_balancer:strategy:"round_robin"# Strategy: round_robin, weighted, randomhealth_aware:true# Only route to healthy backends# Distributed tracing configurationtracing:enabled:true# Enable/disable distributed tracingw3c_trace_context:true# Support W3C Trace Context (traceparent header)headers:trace_id:"X-Trace-ID"# Header name for trace IDrequest_id:"X-Request-ID"# Header name for request IDcorrelation_id:"X-Correlation-ID"# Header name for correlation ID# Circuit breaker configuration (future feature)circuit_breaker:enabled:false# Enable circuit breakerfailure_threshold:5# Failures to open circuitrecovery_timeout:"60s"# Time before attempting recoveryhalf_open_retries:3# Retries in half-open state# Rate limiting configuration (future feature)rate_limiting:enabled:false# Enable rate limitingrequests_per_second:100# Global requests per secondburst_size:200# Burst capacity# Admin API configurationadmin:auth:method:bearer_token# Auth method: none, bearer_token, basic, api_keytoken:"${ADMIN_TOKEN}"# Admin authentication tokenstats:enabled:true# Enable/disable stats collectionretention_window:24h# Ring-buffer retention for windowed queriestoken_tracking:true# Parse response bodies for token usagepersistence:enabled:true# Enable stats persistence across restartspath:./data/stats.json# File path for the snapshotsnapshot_interval:5m# How often to write periodic snapshotsmax_age:7d# Discard snapshots older than this on startup# Metrics and monitoring configuration (future feature)metrics:enabled:false# Enable metrics collectionendpoint:"/metrics"# Metrics endpoint pathinclude_labels:true# Include detailed labels
# Minimal configuration - other settings will use defaultsserver:bind_address:"0.0.0.0:8080"backends:-name:"ollama"url:"http://localhost:11434"-name:"lm-studio"url:"http://localhost:1234"
# Use config file with overridescontinuum-router--configconfig.yaml--bind"0.0.0.0:9000"# Override backends temporarilycontinuum-router--configconfig.yaml--backends"http://localhost:11434"# Use custom model metadata filecontinuum-router--configconfig.yaml--model-metadata/path/to/custom-metadata.yaml
# Use model metadata with tilde expansioncontinuum-router--model-metadata~/configs/model-metadata.yaml
# Adjust health check settings for testingcontinuum-router--configconfig.yaml--health-check-interval10# Generate sample configurationcontinuum-router--generate-config>my-config.yaml