1use serde::{Deserialize, Serialize};
4
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
6#[serde(rename_all = "snake_case")]
7pub enum CompressionLevel {
8 #[default]
9 None,
10 Light,
11 Balanced,
12 Aggressive,
13}
14
15pub struct PromptCompressor {
16 level: CompressionLevel,
17 stop_words: Vec<&'static str>,
18}
19
20impl PromptCompressor {
21 pub fn new(level: CompressionLevel) -> Self {
22 let stop_words = vec![
23 "the", "a", "an", "is", "are", "was", "were", "be", "been", "being", "have", "has",
24 "had", "do", "does", "did", "will", "would", "could", "should", "may", "might", "must",
25 "shall", "can", "need", "dare", "to", "of", "in", "for", "on", "with", "at", "by",
26 "from", "as", "into", "through", "during", "before", "after", "above", "below", "and",
27 "but", "or", "nor", "so", "yet", "both", "either", "neither",
28 ];
29
30 Self { level, stop_words }
31 }
32
33 pub fn compress(&self, prompt: &str) -> CompressedPrompt {
34 match self.level {
35 CompressionLevel::None => CompressedPrompt {
36 original: prompt.to_string(),
37 compressed: prompt.to_string(),
38 original_tokens: self.estimate_tokens(prompt),
39 compressed_tokens: self.estimate_tokens(prompt),
40 compression_ratio: 0.0,
41 },
42 CompressionLevel::Light => self.compress_light(prompt),
43 CompressionLevel::Balanced => self.compress_balanced(prompt),
44 CompressionLevel::Aggressive => self.compress_aggressive(prompt),
45 }
46 }
47
48 fn compress_light(&self, prompt: &str) -> CompressedPrompt {
49 let mut compressed = prompt.to_string();
50
51 compressed = self.remove_extra_whitespace(&compressed);
52 compressed = self.remove_filler_phrases(&compressed);
53
54 let original_tokens = self.estimate_tokens(prompt);
55 let compressed_tokens = self.estimate_tokens(&compressed);
56 let ratio = if original_tokens > 0 {
57 (original_tokens - compressed_tokens) as f64 / original_tokens as f64
58 } else {
59 0.0
60 };
61
62 CompressedPrompt {
63 original: prompt.to_string(),
64 compressed,
65 original_tokens,
66 compressed_tokens,
67 compression_ratio: ratio,
68 }
69 }
70
71 fn compress_balanced(&self, prompt: &str) -> CompressedPrompt {
72 let mut compressed = prompt.to_string();
73
74 compressed = self.remove_extra_whitespace(&compressed);
75 compressed = self.remove_filler_phrases(&compressed);
76 compressed = self.shorten_sentences(&compressed);
77 compressed = self.remove_redundant_words(&compressed);
78
79 let original_tokens = self.estimate_tokens(prompt);
80 let compressed_tokens = self.estimate_tokens(&compressed);
81 let ratio = if original_tokens > 0 {
82 (original_tokens - compressed_tokens) as f64 / original_tokens as f64
83 } else {
84 0.0
85 };
86
87 CompressedPrompt {
88 original: prompt.to_string(),
89 compressed,
90 original_tokens,
91 compressed_tokens,
92 compression_ratio: ratio,
93 }
94 }
95
96 fn compress_aggressive(&self, prompt: &str) -> CompressedPrompt {
97 let mut compressed = prompt.to_string();
98
99 compressed = self.remove_extra_whitespace(&compressed);
100 compressed = self.remove_filler_phrases(&compressed);
101 compressed = self.shorten_sentences(&compressed);
102 compressed = self.remove_redundant_words(&compressed);
103 compressed = self.extract_key_information(&compressed);
104
105 let original_tokens = self.estimate_tokens(prompt);
106 let compressed_tokens = self.estimate_tokens(&compressed);
107 let ratio = if original_tokens > 0 {
108 (original_tokens - compressed_tokens) as f64 / original_tokens as f64
109 } else {
110 0.0
111 };
112
113 CompressedPrompt {
114 original: prompt.to_string(),
115 compressed,
116 original_tokens,
117 compressed_tokens,
118 compression_ratio: ratio,
119 }
120 }
121
122 fn remove_extra_whitespace(&self, text: &str) -> String {
123 text.split_whitespace().collect::<Vec<_>>().join(" ")
124 }
125
126 fn remove_filler_phrases(&self, text: &str) -> String {
127 let fillers = [
128 "please ",
129 "kindly ",
130 "basically ",
131 "actually ",
132 "literally ",
133 "really ",
134 "just ",
135 "simply ",
136 "of course ",
137 "as you know ",
138 "you see ",
139 "i was wondering ",
140 "i wanted to ask ",
141 "if you could ",
142 "if possible ",
143 ];
144
145 let mut result = text.to_string();
146 for filler in fillers {
147 result = result.to_lowercase().replace(filler, "");
148 }
149
150 result
151 }
152
153 fn shorten_sentences(&self, text: &str) -> String {
154 let abbreviations = [
155 ("information", "info"),
156 ("because", "bc"),
157 ("without", "w/o"),
158 ("with", "w/"),
159 ("through", "thru"),
160 ("approximately", "approx"),
161 ("different", "diff"),
162 ("example", "ex"),
163 ("question", "q"),
164 ("answer", "a"),
165 ("number", "num"),
166 ];
167
168 let mut result = text.to_string();
169 for (long, short) in abbreviations {
170 result = result.replace(&format!(" {} ", long), &format!(" {} ", short));
171 result = result.replace(&format!("{} ", long), &format!("{} ", short));
172 }
173
174 result
175 }
176
177 fn remove_redundant_words(&self, text: &str) -> String {
178 let words: Vec<&str> = text.split_whitespace().collect();
179 let mut result = Vec::new();
180
181 for word in words {
182 let word_lower = word.to_lowercase();
183 let is_stop = self.stop_words.contains(&word_lower.as_str());
184
185 if !is_stop {
186 result.push(word);
187 }
188 }
189
190 result.join(" ")
191 }
192
193 fn extract_key_information(&self, text: &str) -> String {
194 let sentences: Vec<&str> = text
195 .split(&['.', '!', '?'][..])
196 .filter(|s| !s.trim().is_empty())
197 .collect();
198
199 if sentences.len() <= 2 {
200 return text.to_string();
201 }
202
203 let important_markers = [
204 "important",
205 "critical",
206 "key",
207 "must",
208 "require",
209 "need",
210 "task",
211 "goal",
212 "create",
213 "build",
214 "make",
215 "write",
216 "find",
217 "get",
218 "calculate",
219 "solve",
220 ];
221
222 let mut important_sentences = Vec::new();
223
224 for sentence in sentences {
225 let sentence_lower = sentence.to_lowercase();
226 if important_markers.iter().any(|m| sentence_lower.contains(m)) {
227 important_sentences.push(sentence.trim());
228 }
229 }
230
231 if important_sentences.is_empty() {
232 important_sentences
233 .iter()
234 .take(2)
235 .copied()
236 .collect::<Vec<_>>()
237 .join(". ")
238 } else {
239 important_sentences.join(". ")
240 }
241 }
242
243 fn estimate_tokens(&self, text: &str) -> u32 {
244 (text.split_whitespace().count() as f64 * 1.3) as u32
245 }
246}
247
248impl Default for PromptCompressor {
249 fn default() -> Self {
250 Self::new(CompressionLevel::None)
251 }
252}
253
254#[derive(Debug, Clone, Serialize, Deserialize)]
255pub struct CompressedPrompt {
256 pub original: String,
257 pub compressed: String,
258 pub original_tokens: u32,
259 pub compressed_tokens: u32,
260 pub compression_ratio: f64,
261}