logicaffeine_language/
lexicon.rs1include!(concat!(env!("OUT_DIR"), "/lexicon_data.rs"));
8
9pub use logicaffeine_lexicon::{
12 Aspect, Case, Definiteness, Feature, Gender, Number, Sort, Time, VerbClass,
13 AdjectiveMetadata, MorphologicalRule, NounMetadata, VerbEntry, VerbMetadata,
14};
15
16pub fn get_canonical_verb(lemma: &str) -> Option<(&'static str, bool)> {
20 lookup_canonical(lemma).map(|m| (m.lemma, m.polarity == Polarity::Negative))
21}
22
23pub trait LexiconTrait {
25 fn lookup_verb(&self, word: &str) -> Option<VerbMetadata>;
26 fn lookup_noun(&self, word: &str) -> Option<NounMetadata>;
27 fn lookup_adjective(&self, word: &str) -> Option<AdjectiveMetadata>;
28}
29
30pub struct StaticLexicon;
32
33impl LexiconTrait for StaticLexicon {
34 fn lookup_verb(&self, word: &str) -> Option<VerbMetadata> {
35 lookup_verb_db(word)
36 }
37
38 fn lookup_noun(&self, word: &str) -> Option<NounMetadata> {
39 lookup_noun_db(word)
40 }
41
42 fn lookup_adjective(&self, word: &str) -> Option<AdjectiveMetadata> {
43 lookup_adjective_db(word)
44 }
45}
46
47pub struct Lexicon {}
49
50impl Lexicon {
51 pub fn new() -> Self {
52 Lexicon {}
53 }
54
55 pub fn lookup_verb(&self, word: &str) -> Option<VerbEntry> {
56 let lower = word.to_lowercase();
57
58 if let Some(entry) = lookup_irregular_verb(&lower) {
59 return Some(entry);
60 }
61
62 if lower.ends_with("ing") {
63 let stem = self.strip_ing(&lower);
64 let lemma = Self::capitalize(&stem);
65 let class = self.lookup_verb_class(&lemma.to_lowercase());
66 return Some(VerbEntry {
67 lemma,
68 time: Time::None,
69 aspect: Aspect::Progressive,
70 class,
71 });
72 }
73
74 if lower.ends_with("ed") {
75 let stem = self.strip_ed(&lower);
76 if !is_base_verb(&stem) {
79 return None;
80 }
81 let lemma = Self::capitalize(&stem);
82 let class = self.lookup_verb_class(&lemma.to_lowercase());
83 return Some(VerbEntry {
84 lemma,
85 time: Time::Past,
86 aspect: Aspect::Simple,
87 class,
88 });
89 }
90
91 let is_third_person = if lower.ends_with("es") && lower.len() > 2 {
92 true
93 } else if lower.ends_with("s") && !lower.ends_with("ss") && lower.len() > 2 {
94 true
95 } else {
96 false
97 };
98
99 if is_third_person {
100 if is_stemming_exception(&lower) {
101 return None;
102 }
103
104 let stem = self.strip_s(&lower);
105 if !is_base_verb(&stem) {
106 return None;
107 }
108 let lemma = Self::capitalize(&stem);
109 let class = self.lookup_verb_class(&lemma.to_lowercase());
110 return Some(VerbEntry {
111 lemma,
112 time: Time::Present,
113 aspect: Aspect::Simple,
114 class,
115 });
116 }
117
118 if is_base_verb(&lower) {
120 let lemma = Self::capitalize(&lower);
121 let class = self.lookup_verb_class(&lower);
122 return Some(VerbEntry {
123 lemma,
124 time: Time::Present,
125 aspect: Aspect::Simple,
126 class,
127 });
128 }
129
130 None
131 }
132
133 fn lookup_verb_class(&self, lemma: &str) -> VerbClass {
134 lookup_verb_class(lemma)
135 }
136
137 fn strip_ing(&self, word: &str) -> String {
138 let base = &word[..word.len() - 3];
139
140 if base.len() >= 2 {
141 let chars: Vec<char> = base.chars().collect();
142 let last = chars[chars.len() - 1];
143 let second_last = chars[chars.len() - 2];
144
145 if last == second_last && !"aeiou".contains(last) {
146 return base[..base.len() - 1].to_string();
147 }
148 }
149
150 if needs_e_ing(base) {
151 return format!("{}e", base);
152 }
153
154 base.to_string()
155 }
156
157 fn strip_ed(&self, word: &str) -> String {
158 let base = &word[..word.len() - 2];
159
160 if base.ends_with("i") {
161 return format!("{}y", &base[..base.len() - 1]);
162 }
163
164 if base.len() >= 2 {
165 let chars: Vec<char> = base.chars().collect();
166 let last = chars[chars.len() - 1];
167 let second_last = chars[chars.len() - 2];
168
169 if last == second_last && !"aeiou".contains(last) {
173 if is_base_verb(base) {
175 return base.to_string();
176 }
177 return base[..base.len() - 1].to_string();
179 }
180
181 if (last == 'l' || last == 'r') && !"aeiou".contains(second_last) {
185 if chars.len() >= 3 && "aeiou".contains(chars[chars.len() - 3]) {
186 return format!("{}e", base);
187 }
188 }
189 }
190
191 if needs_e_ed(base) {
192 return format!("{}e", base);
193 }
194
195 let with_e = format!("{}e", base);
199 if is_base_verb(&with_e) {
200 return with_e;
201 }
202
203 base.to_string()
204 }
205
206 fn strip_s(&self, word: &str) -> String {
207 if word.ends_with("ies") {
208 return format!("{}y", &word[..word.len() - 3]);
209 }
210 if word.ends_with("es") {
213 let base_minus_es = &word[..word.len() - 2];
214 let base_minus_s = &word[..word.len() - 1];
215 if base_minus_s.ends_with('e') {
217 return base_minus_s.to_string();
218 }
219 return base_minus_es.to_string();
221 }
222 word[..word.len() - 1].to_string()
223 }
224
225 fn capitalize(s: &str) -> String {
226 let mut chars = s.chars();
227 match chars.next() {
228 None => String::new(),
229 Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
230 }
231 }
232}
233
234impl Default for Lexicon {
235 fn default() -> Self {
236 Self::new()
237 }
238}
239
240#[derive(Debug, Clone, PartialEq, Eq)]
242pub enum WordAnalysis {
243 Noun(NounMetadata),
245 DerivedNoun {
247 lemma: String,
248 number: Number,
249 },
250}
251
252pub fn analyze_word(word: &str) -> Option<WordAnalysis> {
259 let lower = word.to_lowercase();
260
261 if let Some(meta) = lookup_noun_db(&lower) {
264 return Some(WordAnalysis::Noun(meta));
265 }
266
267 if lower.ends_with('s') && lower.len() > 2 {
270 let stem = &lower[..lower.len() - 1];
272 if let Some(meta) = lookup_noun_db(stem) {
273 return Some(WordAnalysis::Noun(NounMetadata {
275 lemma: meta.lemma,
276 number: Number::Plural,
277 features: meta.features,
278 }));
279 }
280
281 if lower.ends_with("es") && lower.len() > 3 {
283 let stem_es = &lower[..lower.len() - 2];
284 if let Some(meta) = lookup_noun_db(stem_es) {
285 return Some(WordAnalysis::Noun(NounMetadata {
286 lemma: meta.lemma,
287 number: Number::Plural,
288 features: meta.features,
289 }));
290 }
291 }
292
293 if lower.ends_with("ies") && lower.len() > 4 {
295 let stem_ies = format!("{}y", &lower[..lower.len() - 3]);
296 if let Some(meta) = lookup_noun_db(&stem_ies) {
297 return Some(WordAnalysis::Noun(NounMetadata {
298 lemma: meta.lemma,
299 number: Number::Plural,
300 features: meta.features,
301 }));
302 }
303 }
304 }
305
306 for rule in get_morphological_rules() {
309 let (is_plural, check_word) = if lower.ends_with('s') && !rule.suffix.ends_with('s') {
311 (true, &lower[..lower.len() - 1])
312 } else {
313 (false, lower.as_str())
314 };
315
316 if check_word.ends_with(rule.suffix) {
317 return Some(WordAnalysis::DerivedNoun {
318 lemma: check_word.to_string(),
319 number: if is_plural { Number::Plural } else { Number::Singular },
320 });
321 }
322 }
323
324 None
325}
326
327pub fn is_derivable_noun(word: &str) -> bool {
330 analyze_word(word).is_some()
331}
332
333pub fn is_proper_name(word: &str) -> bool {
337 let lower = word.to_lowercase();
338 if let Some(meta) = lookup_noun_db(&lower) {
339 return meta.features.contains(&Feature::Proper);
340 }
341 false
342}
343
344pub fn get_canonical_noun(word: &str) -> Option<&'static str> {
355 match analyze_word(word) {
356 Some(WordAnalysis::Noun(meta)) => Some(meta.lemma),
357 Some(WordAnalysis::DerivedNoun { .. }) => {
358 None
362 }
363 _ => None,
364 }
365}