logicaffeine_lexicon/runtime.rs
1//! Runtime lexicon loading for development builds.
2//!
3//! This module provides dynamic JSON-based lexicon loading as an alternative
4//! to compile-time code generation. Enable with the `dynamic-lexicon` feature.
5//!
6//! # Architecture
7//!
8//! The runtime lexicon trades compile-time safety for faster iteration during
9//! development. Instead of generating Rust code from `lexicon.json` at build time,
10//! this module embeds the JSON and parses it once at runtime when `LexiconIndex::new()`
11//! is called.
12//!
13//! # When to Use
14//!
15//! - **Development**: Use `dynamic-lexicon` for faster edit-compile cycles when
16//! modifying the lexicon.
17//! - **Production**: Disable this feature for compile-time validation and
18//! slightly faster startup.
19//!
20//! # JSON Format
21//!
22//! The lexicon file must contain three top-level arrays:
23//!
24//! - `nouns`: Array of `NounEntry` objects with `lemma`, optional `forms`, `features`, and `sort`
25//! - `verbs`: Array of `VerbEntry` objects with `lemma`, `class`, optional `forms`, and `features`
26//! - `adjectives`: Array of `AdjectiveEntry` objects with `lemma`, `regular`, and `features`
27//!
28//! # Example
29//!
30//! ```
31//! use logicaffeine_lexicon::runtime::LexiconIndex;
32//!
33//! let lexicon = LexiconIndex::new();
34//! let proper_nouns = lexicon.proper_nouns();
35//! assert!(!proper_nouns.is_empty());
36//! ```
37//!
38//! # Type Disambiguation
39//!
40//! This module defines its own `VerbEntry`, `NounEntry`, and `AdjectiveEntry` types
41//! for JSON deserialization. These are distinct from `crate::VerbEntry` and other types
42//! in the parent `crate::types` module, which are used for compile-time generated lookups.
43
44use rand::seq::SliceRandom;
45use serde::Deserialize;
46use std::collections::HashMap;
47
48const LEXICON_JSON: &str = include_str!("../../logicaffeine_language/assets/lexicon.json");
49
50/// Deserialized lexicon data from lexicon.json.
51#[derive(Deserialize, Debug)]
52pub struct LexiconData {
53 /// All noun entries including proper nouns and common nouns.
54 pub nouns: Vec<NounEntry>,
55 /// All verb entries with Vendler class and features.
56 pub verbs: Vec<VerbEntry>,
57 /// All adjective entries with gradability info.
58 pub adjectives: Vec<AdjectiveEntry>,
59}
60
61/// A noun entry from the lexicon database.
62#[derive(Deserialize, Debug, Clone)]
63pub struct NounEntry {
64 /// Base form of the noun (e.g., "dog", "Mary").
65 pub lemma: String,
66 /// Irregular inflected forms: "plural" → "mice", etc.
67 #[serde(default)]
68 pub forms: HashMap<String, String>,
69 /// Grammatical/semantic features: "Animate", "Proper", "Countable".
70 #[serde(default)]
71 pub features: Vec<String>,
72 /// Semantic sort for type checking: "Human", "Physical", "Abstract".
73 #[serde(default)]
74 pub sort: Option<String>,
75}
76
77/// A verb entry from the lexicon database.
78#[derive(Deserialize, Debug, Clone)]
79pub struct VerbEntry {
80 /// Base/infinitive form of the verb (e.g., "run", "give").
81 pub lemma: String,
82 /// Vendler Aktionsart class: "State", "Activity", "Accomplishment", "Achievement".
83 pub class: String,
84 /// Irregular inflected forms: "past" → "ran", "participle" → "run".
85 #[serde(default)]
86 pub forms: HashMap<String, String>,
87 /// Grammatical/semantic features: "Transitive", "Ditransitive", "Control".
88 #[serde(default)]
89 pub features: Vec<String>,
90}
91
92/// An adjective entry from the lexicon database.
93#[derive(Deserialize, Debug, Clone)]
94pub struct AdjectiveEntry {
95 /// Base/positive form of the adjective (e.g., "tall", "happy").
96 pub lemma: String,
97 /// Whether comparative/superlative follow regular -er/-est pattern.
98 #[serde(default)]
99 pub regular: bool,
100 /// Semantic features: "Gradable", "Subsective", "NonIntersective".
101 #[serde(default)]
102 pub features: Vec<String>,
103}
104
105/// Index for querying the lexicon by features, sorts, and classes.
106pub struct LexiconIndex {
107 data: LexiconData,
108}
109
110impl LexiconIndex {
111 /// Load and parse the lexicon from the embedded JSON file.
112 pub fn new() -> Self {
113 let data: LexiconData = serde_json::from_str(LEXICON_JSON)
114 .expect("Failed to parse lexicon.json");
115 Self { data }
116 }
117
118 /// Get all nouns marked with the "Proper" feature (names).
119 pub fn proper_nouns(&self) -> Vec<&NounEntry> {
120 self.data.nouns.iter()
121 .filter(|n| n.features.iter().any(|f| f == "Proper"))
122 .collect()
123 }
124
125 /// Get all nouns NOT marked as proper (common nouns).
126 pub fn common_nouns(&self) -> Vec<&NounEntry> {
127 self.data.nouns.iter()
128 .filter(|n| !n.features.iter().any(|f| f == "Proper"))
129 .collect()
130 }
131
132 /// Get all nouns with a specific feature (case-insensitive).
133 pub fn nouns_with_feature(&self, feature: &str) -> Vec<&NounEntry> {
134 self.data.nouns.iter()
135 .filter(|n| n.features.iter().any(|f| f.eq_ignore_ascii_case(feature)))
136 .collect()
137 }
138
139 /// Get all nouns with a specific semantic sort (case-insensitive).
140 pub fn nouns_with_sort(&self, sort: &str) -> Vec<&NounEntry> {
141 self.data.nouns.iter()
142 .filter(|n| n.sort.as_ref().map(|s| s.eq_ignore_ascii_case(sort)).unwrap_or(false))
143 .collect()
144 }
145
146 /// Get all verbs with a specific feature (case-insensitive).
147 pub fn verbs_with_feature(&self, feature: &str) -> Vec<&VerbEntry> {
148 self.data.verbs.iter()
149 .filter(|v| v.features.iter().any(|f| f.eq_ignore_ascii_case(feature)))
150 .collect()
151 }
152
153 /// Get all verbs with a specific Vendler class (case-insensitive).
154 pub fn verbs_with_class(&self, class: &str) -> Vec<&VerbEntry> {
155 self.data.verbs.iter()
156 .filter(|v| v.class.eq_ignore_ascii_case(class))
157 .collect()
158 }
159
160 /// Get all verbs that are intransitive (no Transitive/Ditransitive feature).
161 pub fn intransitive_verbs(&self) -> Vec<&VerbEntry> {
162 self.data.verbs.iter()
163 .filter(|v| {
164 !v.features.iter().any(|f|
165 f.eq_ignore_ascii_case("Transitive") ||
166 f.eq_ignore_ascii_case("Ditransitive")
167 )
168 })
169 .collect()
170 }
171
172 /// Returns all verbs that take a direct object.
173 ///
174 /// Includes both transitive verbs (two-place predicates) and ditransitive verbs
175 /// (three-place predicates). Verbs are matched if they have either the `"Transitive"`
176 /// or `"Ditransitive"` feature (case-insensitive).
177 pub fn transitive_verbs(&self) -> Vec<&VerbEntry> {
178 self.data.verbs.iter()
179 .filter(|v| {
180 v.features.iter().any(|f| f.eq_ignore_ascii_case("Transitive")) ||
181 v.features.iter().any(|f| f.eq_ignore_ascii_case("Ditransitive"))
182 })
183 .collect()
184 }
185
186 /// Returns all adjectives with a specific feature (case-insensitive).
187 ///
188 /// Common features include `"Intersective"`, `"Subsective"`, `"NonIntersective"`,
189 /// and `"Gradable"`. See [`crate::Feature`] for the full list.
190 pub fn adjectives_with_feature(&self, feature: &str) -> Vec<&AdjectiveEntry> {
191 self.data.adjectives.iter()
192 .filter(|a| a.features.iter().any(|f| f.eq_ignore_ascii_case(feature)))
193 .collect()
194 }
195
196 /// Returns all adjectives with intersective semantics.
197 ///
198 /// Intersective adjectives combine with nouns via set intersection:
199 /// "red ball" denotes things that are both red and balls. This is a convenience
200 /// method equivalent to `adjectives_with_feature("Intersective")`.
201 pub fn intersective_adjectives(&self) -> Vec<&AdjectiveEntry> {
202 self.adjectives_with_feature("Intersective")
203 }
204
205 /// Selects a random proper noun from the lexicon.
206 ///
207 /// Returns `None` if the lexicon contains no proper nouns.
208 pub fn random_proper_noun(&self, rng: &mut impl rand::Rng) -> Option<&NounEntry> {
209 self.proper_nouns().choose(rng).copied()
210 }
211
212 /// Selects a random common noun from the lexicon.
213 ///
214 /// Returns `None` if the lexicon contains no common nouns.
215 pub fn random_common_noun(&self, rng: &mut impl rand::Rng) -> Option<&NounEntry> {
216 self.common_nouns().choose(rng).copied()
217 }
218
219 /// Selects a random verb from the lexicon.
220 ///
221 /// Returns `None` if the lexicon contains no verbs.
222 pub fn random_verb(&self, rng: &mut impl rand::Rng) -> Option<&VerbEntry> {
223 self.data.verbs.choose(rng)
224 }
225
226 /// Selects a random intransitive verb from the lexicon.
227 ///
228 /// Returns `None` if the lexicon contains no intransitive verbs.
229 pub fn random_intransitive_verb(&self, rng: &mut impl rand::Rng) -> Option<&VerbEntry> {
230 self.intransitive_verbs().choose(rng).copied()
231 }
232
233 /// Selects a random transitive or ditransitive verb from the lexicon.
234 ///
235 /// Returns `None` if the lexicon contains no transitive verbs.
236 pub fn random_transitive_verb(&self, rng: &mut impl rand::Rng) -> Option<&VerbEntry> {
237 self.transitive_verbs().choose(rng).copied()
238 }
239
240 /// Selects a random adjective from the lexicon.
241 ///
242 /// Returns `None` if the lexicon contains no adjectives.
243 pub fn random_adjective(&self, rng: &mut impl rand::Rng) -> Option<&AdjectiveEntry> {
244 self.data.adjectives.choose(rng)
245 }
246
247 /// Selects a random intersective adjective from the lexicon.
248 ///
249 /// Returns `None` if the lexicon contains no intersective adjectives.
250 pub fn random_intersective_adjective(&self, rng: &mut impl rand::Rng) -> Option<&AdjectiveEntry> {
251 self.intersective_adjectives().choose(rng).copied()
252 }
253}
254
255/// Creates a [`LexiconIndex`] by loading and parsing the embedded lexicon JSON.
256///
257/// Equivalent to calling [`LexiconIndex::new()`].
258impl Default for LexiconIndex {
259 fn default() -> Self {
260 Self::new()
261 }
262}
263
264/// Computes the plural form of a noun.
265///
266/// Returns the irregular plural if one is defined in the noun's `forms` map under
267/// the `"plural"` key. Otherwise, applies English pluralization rules:
268///
269/// - Sibilants (`-s`, `-x`, `-ch`, `-sh`) → append `-es` ("box" → "boxes")
270/// - Consonant + `y` → replace `y` with `-ies` ("city" → "cities")
271/// - Vowel + `y` (`-ay`, `-ey`, `-oy`, `-uy`) → append `-s` ("day" → "days")
272/// - Default → append `-s` ("dog" → "dogs")
273///
274/// # Arguments
275///
276/// * `noun` - The noun entry containing the lemma and optional irregular forms.
277///
278/// # Examples
279///
280/// ```
281/// use logicaffeine_lexicon::runtime::{NounEntry, pluralize};
282/// use std::collections::HashMap;
283///
284/// // Regular noun
285/// let dog = NounEntry {
286/// lemma: "dog".to_string(),
287/// forms: HashMap::new(),
288/// features: vec![],
289/// sort: None,
290/// };
291/// assert_eq!(pluralize(&dog), "dogs");
292///
293/// // Irregular noun
294/// let mouse = NounEntry {
295/// lemma: "mouse".to_string(),
296/// forms: [("plural".to_string(), "mice".to_string())].into(),
297/// features: vec![],
298/// sort: None,
299/// };
300/// assert_eq!(pluralize(&mouse), "mice");
301/// ```
302pub fn pluralize(noun: &NounEntry) -> String {
303 if let Some(plural) = noun.forms.get("plural") {
304 plural.clone()
305 } else {
306 let lemma = noun.lemma.to_lowercase();
307 if lemma.ends_with('s') || lemma.ends_with('x') ||
308 lemma.ends_with("ch") || lemma.ends_with("sh") {
309 format!("{}es", lemma)
310 } else if lemma.ends_with('y') && !lemma.ends_with("ay") &&
311 !lemma.ends_with("ey") && !lemma.ends_with("oy") && !lemma.ends_with("uy") {
312 format!("{}ies", &lemma[..lemma.len()-1])
313 } else {
314 format!("{}s", lemma)
315 }
316 }
317}
318
319/// Computes the third-person singular present tense form of a verb.
320///
321/// Returns the irregular form if one is defined in the verb's `forms` map under
322/// the `"present3s"` key. Otherwise, applies English conjugation rules:
323///
324/// - Sibilants and `-o` (`-s`, `-x`, `-ch`, `-sh`, `-o`) → append `-es` ("go" → "goes")
325/// - Consonant + `y` → replace `y` with `-ies` ("fly" → "flies")
326/// - Vowel + `y` (`-ay`, `-ey`, `-oy`, `-uy`) → append `-s` ("play" → "plays")
327/// - Default → append `-s` ("run" → "runs")
328///
329/// # Arguments
330///
331/// * `verb` - The verb entry containing the lemma and optional irregular forms.
332///
333/// # Examples
334///
335/// ```
336/// use logicaffeine_lexicon::runtime::{VerbEntry, present_3s};
337/// use std::collections::HashMap;
338///
339/// let run = VerbEntry {
340/// lemma: "run".to_string(),
341/// class: "Activity".to_string(),
342/// forms: HashMap::new(),
343/// features: vec![],
344/// };
345/// assert_eq!(present_3s(&run), "runs");
346///
347/// let go = VerbEntry {
348/// lemma: "go".to_string(),
349/// class: "Activity".to_string(),
350/// forms: [("present3s".to_string(), "goes".to_string())].into(),
351/// features: vec![],
352/// };
353/// assert_eq!(present_3s(&go), "goes");
354/// ```
355pub fn present_3s(verb: &VerbEntry) -> String {
356 if let Some(form) = verb.forms.get("present3s") {
357 form.clone()
358 } else {
359 let lemma = verb.lemma.to_lowercase();
360 if lemma.ends_with('s') || lemma.ends_with('x') ||
361 lemma.ends_with("ch") || lemma.ends_with("sh") || lemma.ends_with('o') {
362 format!("{}es", lemma)
363 } else if lemma.ends_with('y') && !lemma.ends_with("ay") &&
364 !lemma.ends_with("ey") && !lemma.ends_with("oy") && !lemma.ends_with("uy") {
365 format!("{}ies", &lemma[..lemma.len()-1])
366 } else {
367 format!("{}s", lemma)
368 }
369 }
370}
371
372/// Computes the past tense form of a verb.
373///
374/// Returns the irregular form if one is defined in the verb's `forms` map under
375/// the `"past"` key. Otherwise, applies English past tense rules:
376///
377/// - Ends in `-e` → append `-d` ("love" → "loved")
378/// - Consonant + `y` → replace `y` with `-ied` ("carry" → "carried")
379/// - Vowel + `y` (`-ay`, `-ey`, `-oy`, `-uy`) → append `-ed` ("play" → "played")
380/// - Default → append `-ed` ("walk" → "walked")
381///
382/// # Arguments
383///
384/// * `verb` - The verb entry containing the lemma and optional irregular forms.
385///
386/// # Examples
387///
388/// ```
389/// use logicaffeine_lexicon::runtime::{VerbEntry, past_tense};
390/// use std::collections::HashMap;
391///
392/// let walk = VerbEntry {
393/// lemma: "walk".to_string(),
394/// class: "Activity".to_string(),
395/// forms: HashMap::new(),
396/// features: vec![],
397/// };
398/// assert_eq!(past_tense(&walk), "walked");
399///
400/// let run = VerbEntry {
401/// lemma: "run".to_string(),
402/// class: "Activity".to_string(),
403/// forms: [("past".to_string(), "ran".to_string())].into(),
404/// features: vec![],
405/// };
406/// assert_eq!(past_tense(&run), "ran");
407/// ```
408pub fn past_tense(verb: &VerbEntry) -> String {
409 if let Some(form) = verb.forms.get("past") {
410 form.clone()
411 } else {
412 let lemma = verb.lemma.to_lowercase();
413 if lemma.ends_with('e') {
414 format!("{}d", lemma)
415 } else if lemma.ends_with('y') && !lemma.ends_with("ay") &&
416 !lemma.ends_with("ey") && !lemma.ends_with("oy") && !lemma.ends_with("uy") {
417 format!("{}ied", &lemma[..lemma.len()-1])
418 } else {
419 format!("{}ed", lemma)
420 }
421 }
422}
423
424/// Computes the gerund (present participle) form of a verb.
425///
426/// Returns the irregular form if one is defined in the verb's `forms` map under
427/// the `"gerund"` key. Otherwise, applies English gerund formation rules:
428///
429/// - Ends in `-e` (but not `-ee`) → drop `e` and append `-ing` ("make" → "making")
430/// - Ends in `-ee` → append `-ing` without dropping ("see" → "seeing")
431/// - Default → append `-ing` ("run" → "running")
432///
433/// Note: This implementation does not handle consonant doubling (e.g., "run" → "running"
434/// should double the 'n', but this produces "runing"). For accurate results with such
435/// verbs, provide an irregular form in the `forms` map.
436///
437/// # Arguments
438///
439/// * `verb` - The verb entry containing the lemma and optional irregular forms.
440///
441/// # Examples
442///
443/// ```
444/// use logicaffeine_lexicon::runtime::{VerbEntry, gerund};
445/// use std::collections::HashMap;
446///
447/// let make = VerbEntry {
448/// lemma: "make".to_string(),
449/// class: "Activity".to_string(),
450/// forms: HashMap::new(),
451/// features: vec![],
452/// };
453/// assert_eq!(gerund(&make), "making");
454///
455/// let see = VerbEntry {
456/// lemma: "see".to_string(),
457/// class: "Activity".to_string(),
458/// forms: HashMap::new(),
459/// features: vec![],
460/// };
461/// assert_eq!(gerund(&see), "seeing");
462/// ```
463pub fn gerund(verb: &VerbEntry) -> String {
464 if let Some(form) = verb.forms.get("gerund") {
465 form.clone()
466 } else {
467 let lemma = verb.lemma.to_lowercase();
468 if lemma.ends_with('e') && !lemma.ends_with("ee") {
469 format!("{}ing", &lemma[..lemma.len()-1])
470 } else {
471 format!("{}ing", lemma)
472 }
473 }
474}
475
476#[cfg(test)]
477mod tests {
478 use super::*;
479
480 #[test]
481 fn test_lexicon_loads() {
482 let index = LexiconIndex::new();
483 assert!(!index.proper_nouns().is_empty());
484 assert!(!index.common_nouns().is_empty());
485 assert!(!index.intersective_adjectives().is_empty());
486 }
487
488 #[test]
489 fn test_proper_nouns() {
490 let index = LexiconIndex::new();
491 let proper = index.proper_nouns();
492 assert!(proper.iter().any(|n| n.lemma == "John"));
493 assert!(proper.iter().any(|n| n.lemma == "Mary"));
494 }
495
496 #[test]
497 fn test_intersective_adjectives() {
498 let index = LexiconIndex::new();
499 let adj = index.intersective_adjectives();
500 assert!(adj.iter().any(|a| a.lemma == "Happy"));
501 assert!(adj.iter().any(|a| a.lemma == "Red"));
502 }
503
504 #[test]
505 fn test_pluralize() {
506 let noun = NounEntry {
507 lemma: "Dog".to_string(),
508 forms: HashMap::new(),
509 features: vec![],
510 sort: None,
511 };
512 assert_eq!(pluralize(&noun), "dogs");
513
514 let noun_irregular = NounEntry {
515 lemma: "Man".to_string(),
516 forms: [("plural".to_string(), "men".to_string())].into(),
517 features: vec![],
518 sort: None,
519 };
520 assert_eq!(pluralize(&noun_irregular), "men");
521 }
522
523 #[test]
524 fn test_present_3s() {
525 let verb = VerbEntry {
526 lemma: "Run".to_string(),
527 class: "Activity".to_string(),
528 forms: HashMap::new(),
529 features: vec![],
530 };
531 assert_eq!(present_3s(&verb), "runs");
532 }
533}