vex_router/config/
mod.rs

1//! Configuration module for SmartRouter
2
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6/// Routing strategy to use
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
8#[serde(rename_all = "snake_case")]
9pub enum RoutingStrategy {
10    /// Automatically pick best model (default)
11    #[default]
12    Auto,
13    /// Minimize cost, maintain quality threshold
14    CostOptimized,
15    /// Maximize quality, ignore cost
16    QualityOptimized,
17    /// Minimize latency
18    LatencyOptimized,
19    /// Balanced approach
20    Balanced,
21    /// User-defined rules
22    Custom,
23}
24
25/// Configuration for a specific model
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct ModelConfig {
28    /// Model identifier (e.g., "gpt-4o", "claude-3-haiku")
29    pub id: String,
30    /// Display name
31    pub name: String,
32    /// Cost per 1M input tokens
33    pub input_cost: f64,
34    /// Cost per 1M output tokens
35    pub output_cost: f64,
36    /// Average latency in ms
37    pub latency_ms: u64,
38    /// Quality score (0-1)
39    pub quality_score: f64,
40    /// Capabilities this model excels at
41    pub capabilities: Vec<ModelCapability>,
42    /// Is this a premium model (used as fallback)?
43    pub is_premium: bool,
44}
45
46/// Model capability tags
47#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
48#[serde(rename_all = "snake_case")]
49pub enum ModelCapability {
50    Code,
51    Reasoning,
52    Creative,
53    Math,
54    Analysis,
55    Summarization,
56    Translation,
57    Chat,
58    General,
59}
60
61/// Main SmartRouter configuration
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct Config {
64    /// Server configuration
65    pub server: ServerConfig,
66    /// Available models
67    pub models: Vec<ModelConfig>,
68    /// Default routing strategy
69    pub default_strategy: RoutingStrategy,
70    /// API keys for model providers (encrypted in production)
71    pub api_keys: HashMap<String, String>,
72    /// Quality threshold (0-1) for cost-optimized routing
73    pub quality_threshold: f64,
74    /// Maximum cost per 1K tokens allowed
75    pub max_cost_per_1k: f64,
76    /// Maximum latency allowed (ms)
77    pub max_latency_ms: u64,
78    /// Enable learning system
79    pub learning_enabled: bool,
80    /// Cache responses
81    pub cache_enabled: bool,
82    /// Rate limit configuration
83    pub rate_limit: RateLimitConfig,
84}
85
86#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct ServerConfig {
88    pub host: String,
89    pub port: u16,
90    pub workers: usize,
91}
92
93#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct RateLimitConfig {
95    pub requests_per_minute: u32,
96    pub requests_per_day: u32,
97}
98
99impl Default for Config {
100    fn default() -> Self {
101        Self {
102            server: ServerConfig {
103                host: "0.0.0.0".to_string(),
104                port: 3000,
105                workers: 4,
106            },
107            models: default_models(),
108            default_strategy: RoutingStrategy::Auto,
109            api_keys: HashMap::new(),
110            quality_threshold: 0.85,
111            max_cost_per_1k: 1.0,
112            max_latency_ms: 5000,
113            learning_enabled: true,
114            cache_enabled: true,
115            rate_limit: RateLimitConfig {
116                requests_per_minute: 1000,
117                requests_per_day: 100000,
118            },
119        }
120    }
121}
122
123pub fn default_models() -> Vec<ModelConfig> {
124    vec![
125        ModelConfig {
126            id: "gpt-4o".to_string(),
127            name: "GPT-4o".to_string(),
128            input_cost: 15.0,
129            output_cost: 15.0,
130            latency_ms: 3000,
131            quality_score: 0.98,
132            capabilities: vec![
133                ModelCapability::Reasoning,
134                ModelCapability::Code,
135                ModelCapability::Creative,
136                ModelCapability::Analysis,
137            ],
138            is_premium: true,
139        },
140        ModelConfig {
141            id: "gpt-4o-mini".to_string(),
142            name: "GPT-4o Mini".to_string(),
143            input_cost: 0.60,
144            output_cost: 0.60,
145            latency_ms: 1000,
146            quality_score: 0.85,
147            capabilities: vec![
148                ModelCapability::Chat,
149                ModelCapability::Summarization,
150                ModelCapability::General,
151            ],
152            is_premium: false,
153        },
154        ModelConfig {
155            id: "claude-3-5-sonnet".to_string(),
156            name: "Claude 3.5 Sonnet".to_string(),
157            input_cost: 3.0,
158            output_cost: 15.0,
159            latency_ms: 2500,
160            quality_score: 0.97,
161            capabilities: vec![
162                ModelCapability::Reasoning,
163                ModelCapability::Creative,
164                ModelCapability::Analysis,
165            ],
166            is_premium: true,
167        },
168        ModelConfig {
169            id: "claude-3-haiku".to_string(),
170            name: "Claude 3 Haiku".to_string(),
171            input_cost: 0.25,
172            output_cost: 1.25,
173            latency_ms: 800,
174            quality_score: 0.82,
175            capabilities: vec![
176                ModelCapability::Chat,
177                ModelCapability::Summarization,
178                ModelCapability::General,
179            ],
180            is_premium: false,
181        },
182        ModelConfig {
183            id: "llama-3-70b".to_string(),
184            name: "Llama 3 70B".to_string(),
185            input_cost: 0.90,
186            output_cost: 0.90,
187            latency_ms: 4000,
188            quality_score: 0.88,
189            capabilities: vec![ModelCapability::Code, ModelCapability::General],
190            is_premium: false,
191        },
192    ]
193}