1use async_trait::async_trait;
4use serde::{Deserialize, Serialize};
5use thiserror::Error;
6
7#[derive(Debug, Error)]
9pub enum LlmError {
10 #[error("Connection failed: {0}")]
11 ConnectionFailed(String),
12 #[error("Request failed: {0}")]
13 RequestFailed(String),
14 #[error("Invalid response: {0}")]
15 InvalidResponse(String),
16 #[error("Rate limited")]
17 RateLimited,
18 #[error("Provider not available")]
19 NotAvailable,
20 #[error("Input too large: {0} bytes exceeds maximum {1} bytes")]
21 InputTooLarge(usize, usize),
22}
23
24pub const MAX_PROMPT_SIZE: usize = 100 * 1024;
26pub const MAX_SYSTEM_SIZE: usize = 10 * 1024;
28
29#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct LlmRequest {
32 #[serde(skip_serializing_if = "Option::is_none")]
34 pub tenant_id: Option<String>,
35 pub system: String,
37 pub prompt: String,
39 pub temperature: f32,
41 pub max_tokens: u32,
43}
44
45impl LlmRequest {
46 pub fn simple(prompt: &str) -> Self {
48 Self {
49 tenant_id: None,
50 system: "You are a helpful assistant.".to_string(),
51 prompt: prompt.to_string(),
52 temperature: 0.7,
53 max_tokens: 1024,
54 }
55 }
56
57 pub fn with_role(system: &str, prompt: &str) -> Self {
59 Self {
60 system: system.to_string(),
61 prompt: prompt.to_string(),
62 temperature: 0.7,
63 max_tokens: 1024,
64 tenant_id: None,
65 }
66 }
67
68 pub fn validate(&self) -> Result<(), LlmError> {
70 if self.prompt.len() > MAX_PROMPT_SIZE {
71 return Err(LlmError::InputTooLarge(self.prompt.len(), MAX_PROMPT_SIZE));
72 }
73 if self.system.len() > MAX_SYSTEM_SIZE {
74 return Err(LlmError::InputTooLarge(self.system.len(), MAX_SYSTEM_SIZE));
75 }
76 Ok(())
77 }
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct LlmResponse {
83 pub content: String,
85 pub model: String,
87 pub tokens_used: Option<u32>,
89 pub latency_ms: u64,
91 #[serde(skip_serializing_if = "Option::is_none")]
93 pub trace_root: Option<String>,
94}
95
96#[async_trait]
98pub trait LlmProvider: Send + Sync + std::fmt::Debug {
99 fn name(&self) -> &str;
101
102 async fn is_available(&self) -> bool;
104
105 async fn complete(&self, request: LlmRequest) -> Result<LlmResponse, LlmError>;
107
108 async fn ask(&self, prompt: &str) -> Result<String, LlmError> {
110 let response = self.complete(LlmRequest::simple(prompt)).await?;
111 Ok(response.content)
112 }
113}
114
115#[async_trait]
117pub trait EmbeddingProvider: Send + Sync + std::fmt::Debug {
118 async fn embed(&self, text: &str) -> Result<Vec<f32>, LlmError>;
120}