vex_runtime/
executor.rs

1//! Agent executor - runs individual agents with LLM backend
2
3use std::sync::Arc;
4use uuid::Uuid;
5
6use serde::Deserialize;
7use vex_adversarial::{
8    Consensus, ConsensusProtocol, Debate, DebateRound, ShadowAgent, ShadowConfig, Vote,
9};
10use vex_core::{Agent, ContextPacket, Hash};
11
12#[derive(Debug, Deserialize)]
13struct ChallengeResponse {
14    is_challenge: bool,
15    confidence: f64,
16    reasoning: String,
17    suggested_revision: Option<String>,
18}
19
20#[derive(Debug, Deserialize)]
21struct VoteResponse {
22    agrees: bool,
23    reflection: String,
24    confidence: f64,
25}
26
27/// Configuration for agent execution
28#[derive(Debug, Clone)]
29pub struct ExecutorConfig {
30    /// Maximum debate rounds
31    pub max_debate_rounds: u32,
32    /// Consensus protocol to use
33    pub consensus_protocol: ConsensusProtocol,
34    /// Whether to spawn shadow agents
35    pub enable_adversarial: bool,
36}
37
38impl Default for ExecutorConfig {
39    fn default() -> Self {
40        Self {
41            max_debate_rounds: 3,
42            consensus_protocol: ConsensusProtocol::Majority,
43            enable_adversarial: true,
44        }
45    }
46}
47
48/// Result of agent execution
49#[derive(Debug, Clone)]
50pub struct ExecutionResult {
51    /// The agent that produced this result
52    pub agent_id: Uuid,
53    /// The final response
54    pub response: String,
55    /// Whether it was verified by adversarial debate
56    pub verified: bool,
57    /// Confidence score (0.0 - 1.0)
58    pub confidence: f64,
59    /// Context packet with merkle hash
60    pub context: ContextPacket,
61    /// Logit-Merkle trace root (for provenance)
62    pub trace_root: Option<Hash>,
63    /// Debate details (if adversarial was enabled)
64    pub debate: Option<Debate>,
65}
66
67use vex_llm::{LlmProvider, LlmRequest};
68
69/// Agent executor - runs agents with LLM backends
70pub struct AgentExecutor<L: LlmProvider> {
71    /// Configuration
72    pub config: ExecutorConfig,
73    /// LLM backend
74    llm: Arc<L>,
75}
76
77impl<L: LlmProvider> Clone for AgentExecutor<L> {
78    fn clone(&self) -> Self {
79        Self {
80            config: self.config.clone(),
81            llm: self.llm.clone(),
82        }
83    }
84}
85
86impl<L: LlmProvider> AgentExecutor<L> {
87    /// Create a new executor
88    pub fn new(llm: Arc<L>, config: ExecutorConfig) -> Self {
89        Self { config, llm }
90    }
91
92    /// Execute an agent with a prompt and return the result
93    pub async fn execute(
94        &self,
95        agent: &mut Agent,
96        prompt: &str,
97    ) -> Result<ExecutionResult, String> {
98        // Step 1: Format context and get initial response from Blue agent
99        let full_prompt = if !agent.context.content.is_empty() {
100            format!(
101                "Previous Context (Time: {}):\n\"{}\"\n\nActive Prompt:\n\"{}\"",
102                agent.context.created_at, agent.context.content, prompt
103            )
104        } else {
105            prompt.to_string()
106        };
107
108        let blue_response = self
109            .llm
110            .complete(LlmRequest::with_role(&agent.config.role, &full_prompt))
111            .await
112            .map_err(|e| e.to_string())?
113            .content;
114
115        // Step 2: If adversarial is enabled, run debate
116        let (final_response, verified, confidence, debate) = if self.config.enable_adversarial {
117            self.run_adversarial_verification(agent, prompt, &blue_response)
118                .await?
119        } else {
120            (blue_response, false, 0.5, None)
121        };
122
123        // Step 3: Create context packet with hash
124        let mut context = ContextPacket::new(&final_response);
125        context.source_agent = Some(agent.id);
126        context.importance = confidence;
127
128        // Step 4: Update agent's context
129        agent.context = context.clone();
130        agent.fitness = confidence;
131
132        Ok(ExecutionResult {
133            agent_id: agent.id,
134            response: final_response,
135            verified,
136            confidence,
137            trace_root: context.trace_root.clone(),
138            context,
139            debate,
140        })
141    }
142
143    /// Run adversarial verification with Red agent
144    async fn run_adversarial_verification(
145        &self,
146        blue_agent: &Agent,
147        _original_prompt: &str,
148        blue_response: &str,
149    ) -> Result<(String, bool, f64, Option<Debate>), String> {
150        // Create shadow agent
151        let shadow = ShadowAgent::new(blue_agent, ShadowConfig::default());
152
153        // Create debate
154        let mut debate = Debate::new(blue_agent.id, shadow.agent.id, blue_response);
155
156        // Initialize weighted consensus
157        let mut consensus = Consensus::new(ConsensusProtocol::WeightedConfidence);
158
159        // Run debate rounds
160        for round_num in 1..=self.config.max_debate_rounds {
161            // Red agent challenges
162            let mut challenge_prompt = shadow.challenge_prompt(blue_response);
163            challenge_prompt.push_str("\n\nIMPORTANT: Respond in valid JSON format: {\"is_challenge\": boolean, \"confidence\": float (0.0-1.0), \"reasoning\": \"string\", \"suggested_revision\": \"string\" | null}. If you agree with the statement, set is_challenge to false.");
164
165            let red_output = self
166                .llm
167                .complete(LlmRequest::with_role(
168                    &shadow.agent.config.role,
169                    &challenge_prompt,
170                ))
171                .await
172                .map_err(|e| e.to_string())?
173                .content;
174
175            // Try to parse JSON response
176            let (is_challenge, red_confidence, red_reasoning, _suggested_revision) =
177                if let Some(start) = red_output.find('{') {
178                    if let Some(end) = red_output.rfind('}') {
179                        if let Ok(res) =
180                            serde_json::from_str::<ChallengeResponse>(&red_output[start..=end])
181                        {
182                            (
183                                res.is_challenge,
184                                res.confidence,
185                                res.reasoning,
186                                res.suggested_revision,
187                            )
188                        } else {
189                            (
190                                red_output.to_lowercase().contains("disagree"),
191                                0.5,
192                                red_output.clone(),
193                                None,
194                            )
195                        }
196                    } else {
197                        (false, 0.0, "Parsing failed".to_string(), None)
198                    }
199                } else {
200                    (false, 0.0, "No JSON found".to_string(), None)
201                };
202
203            let rebuttal = if is_challenge {
204                let rebuttal_prompt = format!(
205                    "Your previous response was challenged by a Red agent:\n\n\
206                     Original: \"{}\"\n\n\
207                     Challenge: \"{}\"\n\n\
208                     Please address these concerns or provide a revised response.",
209                    blue_response, red_reasoning
210                );
211                Some(
212                    self.llm
213                        .complete(LlmRequest::with_role(
214                            &blue_agent.config.role,
215                            &rebuttal_prompt,
216                        ))
217                        .await
218                        .map_err(|e| e.to_string())?
219                        .content,
220                )
221            } else {
222                None
223            };
224
225            debate.add_round(DebateRound {
226                round: round_num,
227                blue_claim: blue_response.to_string(),
228                red_challenge: red_reasoning.clone(),
229                blue_rebuttal: rebuttal,
230            });
231
232            // Vote: Red votes based on whether it found a challenge
233            consensus.add_vote(Vote {
234                agent_id: shadow.agent.id,
235                agrees: !is_challenge,
236                confidence: red_confidence,
237                reasoning: Some(red_reasoning),
238            });
239
240            if !is_challenge {
241                break;
242            }
243        }
244
245        // Blue agent reflects on the debate and decides its final vote (Fix for #3 bias)
246        let mut reflection_prompt = format!(
247            "You have just finished an adversarial debate about your original response.\n\n\
248             Original Response: \"{}\"\n\n\
249             Debate Rounds:\n",
250            blue_response
251        );
252
253        for (i, round) in debate.rounds.iter().enumerate() {
254            reflection_prompt.push_str(&format!(
255                "Round {}: Red challenged: \"{}\" -> You rebutted: \"{}\"\n",
256                i + 1,
257                round.red_challenge,
258                round.blue_rebuttal.as_deref().unwrap_or("N/A")
259            ));
260        }
261
262        reflection_prompt.push_str("\nBased on this debate, do you still stand by your original response? \
263                                    Respond in valid JSON: {\"agrees\": boolean, \"confidence\": float (0.0-1.0), \"reasoning\": \"string\"}.");
264
265        let blue_vote_res = self
266            .llm
267            .complete(LlmRequest::with_role(
268                &blue_agent.config.role,
269                &reflection_prompt,
270            ))
271            .await;
272
273        let (blue_agrees, blue_confidence, blue_reasoning) = if let Ok(resp) = blue_vote_res {
274            if let Some(start) = resp.content.find('{') {
275                if let Some(end) = resp.content.rfind('}') {
276                    if let Ok(vote) =
277                        serde_json::from_str::<VoteResponse>(&resp.content[start..=end])
278                    {
279                        (vote.agrees, vote.confidence, vote.reflection)
280                    } else {
281                        (
282                            true,
283                            blue_agent.fitness.max(0.5f64),
284                            "Failed to parse reflection JSON".to_string(),
285                        )
286                    }
287                } else {
288                    (
289                        true,
290                        blue_agent.fitness.max(0.5f64),
291                        "No JSON in reflection".to_string(),
292                    )
293                }
294            } else {
295                (
296                    true,
297                    blue_agent.fitness.max(0.5f64),
298                    "No reflection content".to_string(),
299                )
300            }
301        } else {
302            (
303                true,
304                blue_agent.fitness.max(0.5f64),
305                "Reflection LLM call failed".to_string(),
306            )
307        };
308
309        consensus.add_vote(Vote {
310            agent_id: blue_agent.id,
311            agrees: blue_agrees,
312            confidence: blue_confidence.max(0.5f64),
313            reasoning: Some(blue_reasoning),
314        });
315
316        consensus.evaluate();
317
318        // Determine final response
319        let final_response = if consensus.reached && consensus.decision == Some(true) {
320            blue_response.to_string()
321        } else if let Some(last_round) = debate.rounds.last() {
322            // Use rebuttal if available, otherwise original
323            last_round
324                .blue_rebuttal
325                .clone()
326                .unwrap_or_else(|| blue_response.to_string())
327        } else {
328            blue_response.to_string()
329        };
330
331        let verified = consensus.reached;
332        let confidence = consensus.confidence;
333
334        debate.conclude(consensus.decision.unwrap_or(true), confidence);
335
336        Ok((final_response, verified, confidence, Some(debate)))
337    }
338}
339
340#[cfg(test)]
341mod tests {
342    use super::*;
343    use vex_core::AgentConfig;
344
345    #[tokio::test]
346    async fn test_executor() {
347        use vex_llm::MockProvider;
348        let llm = Arc::new(MockProvider::smart());
349        let executor = AgentExecutor::new(llm, ExecutorConfig::default());
350        let mut agent = Agent::new(AgentConfig::default());
351
352        let result = executor.execute(&mut agent, "Test prompt").await.unwrap();
353        assert!(!result.response.is_empty());
354        assert!(result.verified);
355    }
356}