vex_router/observability/
mod.rs1use chrono::Utc;
4use parking_lot::RwLock;
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7use std::sync::Arc;
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct RequestMetrics {
11 pub request_id: String,
12 pub timestamp: i64,
13 pub model_used: String,
14 pub routing_strategy: String,
15 pub complexity_score: f64,
16 pub tokens_input: u32,
17 pub tokens_output: u32,
18 pub cost_usd: f64,
19 pub latency_ms: u64,
20 pub first_token_ms: Option<u64>,
21 pub cache_hit: bool,
22 pub cache_similarity: Option<f32>,
23 pub compression_ratio: Option<f64>,
24 pub guardrails_passed: bool,
25 pub error: Option<String>,
26}
27
28impl RequestMetrics {
29 pub fn new(request_id: String, model_used: String, routing_strategy: String) -> Self {
30 Self {
31 request_id,
32 timestamp: Utc::now().timestamp(),
33 model_used,
34 routing_strategy,
35 complexity_score: 0.0,
36 tokens_input: 0,
37 tokens_output: 0,
38 cost_usd: 0.0,
39 latency_ms: 0,
40 first_token_ms: None,
41 cache_hit: false,
42 cache_similarity: None,
43 compression_ratio: None,
44 guardrails_passed: true,
45 error: None,
46 }
47 }
48}
49
50#[derive(Debug)]
51pub struct Observability {
52 metrics: Arc<RwLock<Vec<RequestMetrics>>>,
53 daily_stats: Arc<RwLock<DailyStats>>,
54 max_metrics_stored: usize,
55}
56
57impl Observability {
58 pub fn new(max_metrics_stored: usize) -> Self {
59 Self {
60 metrics: Arc::new(RwLock::new(Vec::new())),
61 daily_stats: Arc::new(RwLock::new(DailyStats::new())),
62 max_metrics_stored,
63 }
64 }
65
66 pub fn record(&self, metric: RequestMetrics) {
67 let mut metrics = self.metrics.write();
68
69 if metrics.len() >= self.max_metrics_stored {
70 metrics.remove(0);
71 }
72
73 metrics.push(metric.clone());
74
75 let mut daily = self.daily_stats.write();
76 daily.record(&metric);
77 }
78
79 pub fn get_metrics(&self, limit: usize) -> Vec<RequestMetrics> {
80 let metrics = self.metrics.read();
81 metrics.iter().rev().take(limit).cloned().collect()
82 }
83
84 pub fn get_summary(&self) -> ObservabilitySummary {
85 let metrics = self.metrics.read();
86
87 if metrics.is_empty() {
88 return ObservabilitySummary::default();
89 }
90
91 let total_requests = metrics.len();
92 let total_cost: f64 = metrics.iter().map(|m| m.cost_usd).sum();
93 let total_tokens_input: u64 = metrics.iter().map(|m| m.tokens_input as u64).sum();
94 let total_tokens_output: u64 = metrics.iter().map(|m| m.tokens_output as u64).sum();
95 let cache_hits = metrics.iter().filter(|m| m.cache_hit).count();
96 let errors = metrics.iter().filter(|m| m.error.is_some()).count();
97
98 let mut latencies: Vec<u64> = metrics.iter().map(|m| m.latency_ms).collect();
99 latencies.sort();
100
101 let avg_latency = latencies.iter().sum::<u64>() as f64 / latencies.len() as f64;
102 let p50_latency = latencies[latencies.len() / 2];
103 let p95_latency = latencies[(latencies.len() * 95) / 100];
104 let p99_latency = latencies[(latencies.len() * 99) / 100];
105
106 ObservabilitySummary {
107 total_requests,
108 total_cost_usd: total_cost,
109 total_tokens_input: total_tokens_input as u32,
110 total_tokens_output: total_tokens_output as u32,
111 avg_cost_per_request: total_cost / total_requests as f64,
112 avg_latency_ms: avg_latency,
113 p50_latency_ms: p50_latency,
114 p95_latency_ms: p95_latency,
115 p99_latency_ms: p99_latency,
116 cache_hit_rate: cache_hits as f64 / total_requests as f64,
117 error_rate: errors as f64 / total_requests as f64,
118 }
119 }
120
121 pub fn get_cost_by_model(&self) -> HashMap<String, f64> {
122 let metrics = self.metrics.read();
123 let mut costs: HashMap<String, f64> = HashMap::new();
124
125 for m in metrics.iter() {
126 *costs.entry(m.model_used.clone()).or_insert(0.0) += m.cost_usd;
127 }
128
129 costs
130 }
131
132 pub fn get_savings(&self) -> SavingsReport {
133 let metrics = self.metrics.read();
134
135 let baseline_cost: f64 = metrics
136 .iter()
137 .map(|m| {
138 m.tokens_input as f64 * 15.0 / 1_000_000.0
139 + m.tokens_output as f64 * 15.0 / 1_000_000.0
140 })
141 .sum();
142
143 let actual_cost: f64 = metrics.iter().map(|m| m.cost_usd).sum();
144
145 let routing_savings = baseline_cost * 0.6;
146 let cache_savings = baseline_cost * metrics.iter().filter(|m| m.cache_hit).count() as f64
147 / metrics.len().max(1) as f64;
148 let compression_savings = baseline_cost
149 * metrics
150 .iter()
151 .filter_map(|m| m.compression_ratio)
152 .sum::<f64>()
153 / metrics.len().max(1) as f64;
154
155 SavingsReport {
156 baseline_cost,
157 actual_cost,
158 total_savings: baseline_cost - actual_cost,
159 savings_percentage: if baseline_cost > 0.0 {
160 (baseline_cost - actual_cost) / baseline_cost * 100.0
161 } else {
162 0.0
163 },
164 routing_savings,
165 cache_savings,
166 compression_savings,
167 }
168 }
169
170 pub fn clear(&self) {
171 let mut metrics = self.metrics.write();
172 metrics.clear();
173
174 let mut daily = self.daily_stats.write();
175 *daily = DailyStats::new();
176 }
177}
178
179impl Default for Observability {
180 fn default() -> Self {
181 Self::new(10000)
182 }
183}
184
185#[derive(Debug, Clone, Serialize, Deserialize, Default, utoipa::ToSchema)]
186pub struct ObservabilitySummary {
187 pub total_requests: usize,
188 pub total_cost_usd: f64,
189 pub total_tokens_input: u32,
190 pub total_tokens_output: u32,
191 pub avg_cost_per_request: f64,
192 pub avg_latency_ms: f64,
193 pub p50_latency_ms: u64,
194 pub p95_latency_ms: u64,
195 pub p99_latency_ms: u64,
196 pub cache_hit_rate: f64,
197 pub error_rate: f64,
198}
199
200#[derive(Debug, Clone, Serialize, Deserialize, Default, utoipa::ToSchema)]
201pub struct SavingsReport {
202 pub baseline_cost: f64,
203 pub actual_cost: f64,
204 pub total_savings: f64,
205 pub savings_percentage: f64,
206 pub routing_savings: f64,
207 pub cache_savings: f64,
208 pub compression_savings: f64,
209}
210
211#[derive(Debug)]
212struct DailyStats {
213 date: String,
214 total_requests: u64,
215 total_cost: f64,
216 total_tokens: u64,
217 errors: u64,
218}
219
220impl DailyStats {
221 fn new() -> Self {
222 Self {
223 date: Utc::now().format("%Y-%m-%d").to_string(),
224 total_requests: 0,
225 total_cost: 0.0,
226 total_tokens: 0,
227 errors: 0,
228 }
229 }
230
231 fn record(&mut self, metric: &RequestMetrics) {
232 let today = Utc::now().format("%Y-%m-%d").to_string();
233
234 if self.date != today {
235 *self = Self::new();
236 self.date = today;
237 }
238
239 self.total_requests += 1;
240 self.total_cost += metric.cost_usd;
241 self.total_tokens += (metric.tokens_input + metric.tokens_output) as u64;
242
243 if metric.error.is_some() {
244 self.errors += 1;
245 }
246 }
247}
248
249pub fn calculate_cost(
250 tokens: u32,
251 input_cost_per_million: f64,
252 output_cost_per_million: f64,
253 is_output: bool,
254) -> f64 {
255 if is_output {
256 tokens as f64 * output_cost_per_million / 1_000_000.0
257 } else {
258 tokens as f64 * input_cost_per_million / 1_000_000.0
259 }
260}