logicaffeine_lexicon/
runtime.rs

1//! Runtime lexicon loading for development builds.
2//!
3//! This module provides dynamic JSON-based lexicon loading as an alternative
4//! to compile-time code generation. Enable with the `dynamic-lexicon` feature.
5//!
6//! # Architecture
7//!
8//! The runtime lexicon trades compile-time safety for faster iteration during
9//! development. Instead of generating Rust code from `lexicon.json` at build time,
10//! this module embeds the JSON and parses it once at runtime when `LexiconIndex::new()`
11//! is called.
12//!
13//! # When to Use
14//!
15//! - **Development**: Use `dynamic-lexicon` for faster edit-compile cycles when
16//!   modifying the lexicon.
17//! - **Production**: Disable this feature for compile-time validation and
18//!   slightly faster startup.
19//!
20//! # JSON Format
21//!
22//! The lexicon file must contain three top-level arrays:
23//!
24//! - `nouns`: Array of `NounEntry` objects with `lemma`, optional `forms`, `features`, and `sort`
25//! - `verbs`: Array of `VerbEntry` objects with `lemma`, `class`, optional `forms`, and `features`
26//! - `adjectives`: Array of `AdjectiveEntry` objects with `lemma`, `regular`, and `features`
27//!
28//! # Example
29//!
30//! ```
31//! use logicaffeine_lexicon::runtime::LexiconIndex;
32//!
33//! let lexicon = LexiconIndex::new();
34//! let proper_nouns = lexicon.proper_nouns();
35//! assert!(!proper_nouns.is_empty());
36//! ```
37//!
38//! # Type Disambiguation
39//!
40//! This module defines its own `VerbEntry`, `NounEntry`, and `AdjectiveEntry` types
41//! for JSON deserialization. These are distinct from `crate::VerbEntry` and other types
42//! in the parent `crate::types` module, which are used for compile-time generated lookups.
43
44use rand::seq::SliceRandom;
45use serde::Deserialize;
46use std::collections::HashMap;
47
48const LEXICON_JSON: &str = include_str!("../../logicaffeine_language/assets/lexicon.json");
49
50/// Deserialized lexicon data from lexicon.json.
51#[derive(Deserialize, Debug)]
52pub struct LexiconData {
53    /// All noun entries including proper nouns and common nouns.
54    pub nouns: Vec<NounEntry>,
55    /// All verb entries with Vendler class and features.
56    pub verbs: Vec<VerbEntry>,
57    /// All adjective entries with gradability info.
58    pub adjectives: Vec<AdjectiveEntry>,
59}
60
61/// A noun entry from the lexicon database.
62#[derive(Deserialize, Debug, Clone)]
63pub struct NounEntry {
64    /// Base form of the noun (e.g., "dog", "Mary").
65    pub lemma: String,
66    /// Irregular inflected forms: "plural" → "mice", etc.
67    #[serde(default)]
68    pub forms: HashMap<String, String>,
69    /// Grammatical/semantic features: "Animate", "Proper", "Countable".
70    #[serde(default)]
71    pub features: Vec<String>,
72    /// Semantic sort for type checking: "Human", "Physical", "Abstract".
73    #[serde(default)]
74    pub sort: Option<String>,
75}
76
77/// A verb entry from the lexicon database.
78#[derive(Deserialize, Debug, Clone)]
79pub struct VerbEntry {
80    /// Base/infinitive form of the verb (e.g., "run", "give").
81    pub lemma: String,
82    /// Vendler Aktionsart class: "State", "Activity", "Accomplishment", "Achievement".
83    pub class: String,
84    /// Irregular inflected forms: "past" → "ran", "participle" → "run".
85    #[serde(default)]
86    pub forms: HashMap<String, String>,
87    /// Grammatical/semantic features: "Transitive", "Ditransitive", "Control".
88    #[serde(default)]
89    pub features: Vec<String>,
90}
91
92/// An adjective entry from the lexicon database.
93#[derive(Deserialize, Debug, Clone)]
94pub struct AdjectiveEntry {
95    /// Base/positive form of the adjective (e.g., "tall", "happy").
96    pub lemma: String,
97    /// Whether comparative/superlative follow regular -er/-est pattern.
98    #[serde(default)]
99    pub regular: bool,
100    /// Semantic features: "Gradable", "Subsective", "NonIntersective".
101    #[serde(default)]
102    pub features: Vec<String>,
103}
104
105/// Index for querying the lexicon by features, sorts, and classes.
106pub struct LexiconIndex {
107    data: LexiconData,
108}
109
110impl LexiconIndex {
111    /// Load and parse the lexicon from the embedded JSON file.
112    pub fn new() -> Self {
113        let data: LexiconData = serde_json::from_str(LEXICON_JSON)
114            .expect("Failed to parse lexicon.json");
115        Self { data }
116    }
117
118    /// Get all nouns marked with the "Proper" feature (names).
119    pub fn proper_nouns(&self) -> Vec<&NounEntry> {
120        self.data.nouns.iter()
121            .filter(|n| n.features.iter().any(|f| f == "Proper"))
122            .collect()
123    }
124
125    /// Get all nouns NOT marked as proper (common nouns).
126    pub fn common_nouns(&self) -> Vec<&NounEntry> {
127        self.data.nouns.iter()
128            .filter(|n| !n.features.iter().any(|f| f == "Proper"))
129            .collect()
130    }
131
132    /// Get all nouns with a specific feature (case-insensitive).
133    pub fn nouns_with_feature(&self, feature: &str) -> Vec<&NounEntry> {
134        self.data.nouns.iter()
135            .filter(|n| n.features.iter().any(|f| f.eq_ignore_ascii_case(feature)))
136            .collect()
137    }
138
139    /// Get all nouns with a specific semantic sort (case-insensitive).
140    pub fn nouns_with_sort(&self, sort: &str) -> Vec<&NounEntry> {
141        self.data.nouns.iter()
142            .filter(|n| n.sort.as_ref().map(|s| s.eq_ignore_ascii_case(sort)).unwrap_or(false))
143            .collect()
144    }
145
146    /// Get all verbs with a specific feature (case-insensitive).
147    pub fn verbs_with_feature(&self, feature: &str) -> Vec<&VerbEntry> {
148        self.data.verbs.iter()
149            .filter(|v| v.features.iter().any(|f| f.eq_ignore_ascii_case(feature)))
150            .collect()
151    }
152
153    /// Get all verbs with a specific Vendler class (case-insensitive).
154    pub fn verbs_with_class(&self, class: &str) -> Vec<&VerbEntry> {
155        self.data.verbs.iter()
156            .filter(|v| v.class.eq_ignore_ascii_case(class))
157            .collect()
158    }
159
160    /// Get all verbs that are intransitive (no Transitive/Ditransitive feature).
161    pub fn intransitive_verbs(&self) -> Vec<&VerbEntry> {
162        self.data.verbs.iter()
163            .filter(|v| {
164                !v.features.iter().any(|f|
165                    f.eq_ignore_ascii_case("Transitive") ||
166                    f.eq_ignore_ascii_case("Ditransitive")
167                )
168            })
169            .collect()
170    }
171
172    /// Returns all verbs that take a direct object.
173    ///
174    /// Includes both transitive verbs (two-place predicates) and ditransitive verbs
175    /// (three-place predicates). Verbs are matched if they have either the `"Transitive"`
176    /// or `"Ditransitive"` feature (case-insensitive).
177    pub fn transitive_verbs(&self) -> Vec<&VerbEntry> {
178        self.data.verbs.iter()
179            .filter(|v| {
180                v.features.iter().any(|f| f.eq_ignore_ascii_case("Transitive")) ||
181                v.features.iter().any(|f| f.eq_ignore_ascii_case("Ditransitive"))
182            })
183            .collect()
184    }
185
186    /// Returns all adjectives with a specific feature (case-insensitive).
187    ///
188    /// Common features include `"Intersective"`, `"Subsective"`, `"NonIntersective"`,
189    /// and `"Gradable"`. See [`crate::Feature`] for the full list.
190    pub fn adjectives_with_feature(&self, feature: &str) -> Vec<&AdjectiveEntry> {
191        self.data.adjectives.iter()
192            .filter(|a| a.features.iter().any(|f| f.eq_ignore_ascii_case(feature)))
193            .collect()
194    }
195
196    /// Returns all adjectives with intersective semantics.
197    ///
198    /// Intersective adjectives combine with nouns via set intersection:
199    /// "red ball" denotes things that are both red and balls. This is a convenience
200    /// method equivalent to `adjectives_with_feature("Intersective")`.
201    pub fn intersective_adjectives(&self) -> Vec<&AdjectiveEntry> {
202        self.adjectives_with_feature("Intersective")
203    }
204
205    /// Selects a random proper noun from the lexicon.
206    ///
207    /// Returns `None` if the lexicon contains no proper nouns.
208    pub fn random_proper_noun(&self, rng: &mut impl rand::Rng) -> Option<&NounEntry> {
209        self.proper_nouns().choose(rng).copied()
210    }
211
212    /// Selects a random common noun from the lexicon.
213    ///
214    /// Returns `None` if the lexicon contains no common nouns.
215    pub fn random_common_noun(&self, rng: &mut impl rand::Rng) -> Option<&NounEntry> {
216        self.common_nouns().choose(rng).copied()
217    }
218
219    /// Selects a random verb from the lexicon.
220    ///
221    /// Returns `None` if the lexicon contains no verbs.
222    pub fn random_verb(&self, rng: &mut impl rand::Rng) -> Option<&VerbEntry> {
223        self.data.verbs.choose(rng)
224    }
225
226    /// Selects a random intransitive verb from the lexicon.
227    ///
228    /// Returns `None` if the lexicon contains no intransitive verbs.
229    pub fn random_intransitive_verb(&self, rng: &mut impl rand::Rng) -> Option<&VerbEntry> {
230        self.intransitive_verbs().choose(rng).copied()
231    }
232
233    /// Selects a random transitive or ditransitive verb from the lexicon.
234    ///
235    /// Returns `None` if the lexicon contains no transitive verbs.
236    pub fn random_transitive_verb(&self, rng: &mut impl rand::Rng) -> Option<&VerbEntry> {
237        self.transitive_verbs().choose(rng).copied()
238    }
239
240    /// Selects a random adjective from the lexicon.
241    ///
242    /// Returns `None` if the lexicon contains no adjectives.
243    pub fn random_adjective(&self, rng: &mut impl rand::Rng) -> Option<&AdjectiveEntry> {
244        self.data.adjectives.choose(rng)
245    }
246
247    /// Selects a random intersective adjective from the lexicon.
248    ///
249    /// Returns `None` if the lexicon contains no intersective adjectives.
250    pub fn random_intersective_adjective(&self, rng: &mut impl rand::Rng) -> Option<&AdjectiveEntry> {
251        self.intersective_adjectives().choose(rng).copied()
252    }
253}
254
255/// Creates a [`LexiconIndex`] by loading and parsing the embedded lexicon JSON.
256///
257/// Equivalent to calling [`LexiconIndex::new()`].
258impl Default for LexiconIndex {
259    fn default() -> Self {
260        Self::new()
261    }
262}
263
264/// Computes the plural form of a noun.
265///
266/// Returns the irregular plural if one is defined in the noun's `forms` map under
267/// the `"plural"` key. Otherwise, applies English pluralization rules:
268///
269/// - Sibilants (`-s`, `-x`, `-ch`, `-sh`) → append `-es` ("box" → "boxes")
270/// - Consonant + `y` → replace `y` with `-ies` ("city" → "cities")
271/// - Vowel + `y` (`-ay`, `-ey`, `-oy`, `-uy`) → append `-s` ("day" → "days")
272/// - Default → append `-s` ("dog" → "dogs")
273///
274/// # Arguments
275///
276/// * `noun` - The noun entry containing the lemma and optional irregular forms.
277///
278/// # Examples
279///
280/// ```
281/// use logicaffeine_lexicon::runtime::{NounEntry, pluralize};
282/// use std::collections::HashMap;
283///
284/// // Regular noun
285/// let dog = NounEntry {
286///     lemma: "dog".to_string(),
287///     forms: HashMap::new(),
288///     features: vec![],
289///     sort: None,
290/// };
291/// assert_eq!(pluralize(&dog), "dogs");
292///
293/// // Irregular noun
294/// let mouse = NounEntry {
295///     lemma: "mouse".to_string(),
296///     forms: [("plural".to_string(), "mice".to_string())].into(),
297///     features: vec![],
298///     sort: None,
299/// };
300/// assert_eq!(pluralize(&mouse), "mice");
301/// ```
302pub fn pluralize(noun: &NounEntry) -> String {
303    if let Some(plural) = noun.forms.get("plural") {
304        plural.clone()
305    } else {
306        let lemma = noun.lemma.to_lowercase();
307        if lemma.ends_with('s') || lemma.ends_with('x') ||
308           lemma.ends_with("ch") || lemma.ends_with("sh") {
309            format!("{}es", lemma)
310        } else if lemma.ends_with('y') && !lemma.ends_with("ay") &&
311                  !lemma.ends_with("ey") && !lemma.ends_with("oy") && !lemma.ends_with("uy") {
312            format!("{}ies", &lemma[..lemma.len()-1])
313        } else {
314            format!("{}s", lemma)
315        }
316    }
317}
318
319/// Computes the third-person singular present tense form of a verb.
320///
321/// Returns the irregular form if one is defined in the verb's `forms` map under
322/// the `"present3s"` key. Otherwise, applies English conjugation rules:
323///
324/// - Sibilants and `-o` (`-s`, `-x`, `-ch`, `-sh`, `-o`) → append `-es` ("go" → "goes")
325/// - Consonant + `y` → replace `y` with `-ies` ("fly" → "flies")
326/// - Vowel + `y` (`-ay`, `-ey`, `-oy`, `-uy`) → append `-s` ("play" → "plays")
327/// - Default → append `-s` ("run" → "runs")
328///
329/// # Arguments
330///
331/// * `verb` - The verb entry containing the lemma and optional irregular forms.
332///
333/// # Examples
334///
335/// ```
336/// use logicaffeine_lexicon::runtime::{VerbEntry, present_3s};
337/// use std::collections::HashMap;
338///
339/// let run = VerbEntry {
340///     lemma: "run".to_string(),
341///     class: "Activity".to_string(),
342///     forms: HashMap::new(),
343///     features: vec![],
344/// };
345/// assert_eq!(present_3s(&run), "runs");
346///
347/// let go = VerbEntry {
348///     lemma: "go".to_string(),
349///     class: "Activity".to_string(),
350///     forms: [("present3s".to_string(), "goes".to_string())].into(),
351///     features: vec![],
352/// };
353/// assert_eq!(present_3s(&go), "goes");
354/// ```
355pub fn present_3s(verb: &VerbEntry) -> String {
356    if let Some(form) = verb.forms.get("present3s") {
357        form.clone()
358    } else {
359        let lemma = verb.lemma.to_lowercase();
360        if lemma.ends_with('s') || lemma.ends_with('x') ||
361           lemma.ends_with("ch") || lemma.ends_with("sh") || lemma.ends_with('o') {
362            format!("{}es", lemma)
363        } else if lemma.ends_with('y') && !lemma.ends_with("ay") &&
364                  !lemma.ends_with("ey") && !lemma.ends_with("oy") && !lemma.ends_with("uy") {
365            format!("{}ies", &lemma[..lemma.len()-1])
366        } else {
367            format!("{}s", lemma)
368        }
369    }
370}
371
372/// Computes the past tense form of a verb.
373///
374/// Returns the irregular form if one is defined in the verb's `forms` map under
375/// the `"past"` key. Otherwise, applies English past tense rules:
376///
377/// - Ends in `-e` → append `-d` ("love" → "loved")
378/// - Consonant + `y` → replace `y` with `-ied` ("carry" → "carried")
379/// - Vowel + `y` (`-ay`, `-ey`, `-oy`, `-uy`) → append `-ed` ("play" → "played")
380/// - Default → append `-ed` ("walk" → "walked")
381///
382/// # Arguments
383///
384/// * `verb` - The verb entry containing the lemma and optional irregular forms.
385///
386/// # Examples
387///
388/// ```
389/// use logicaffeine_lexicon::runtime::{VerbEntry, past_tense};
390/// use std::collections::HashMap;
391///
392/// let walk = VerbEntry {
393///     lemma: "walk".to_string(),
394///     class: "Activity".to_string(),
395///     forms: HashMap::new(),
396///     features: vec![],
397/// };
398/// assert_eq!(past_tense(&walk), "walked");
399///
400/// let run = VerbEntry {
401///     lemma: "run".to_string(),
402///     class: "Activity".to_string(),
403///     forms: [("past".to_string(), "ran".to_string())].into(),
404///     features: vec![],
405/// };
406/// assert_eq!(past_tense(&run), "ran");
407/// ```
408pub fn past_tense(verb: &VerbEntry) -> String {
409    if let Some(form) = verb.forms.get("past") {
410        form.clone()
411    } else {
412        let lemma = verb.lemma.to_lowercase();
413        if lemma.ends_with('e') {
414            format!("{}d", lemma)
415        } else if lemma.ends_with('y') && !lemma.ends_with("ay") &&
416                  !lemma.ends_with("ey") && !lemma.ends_with("oy") && !lemma.ends_with("uy") {
417            format!("{}ied", &lemma[..lemma.len()-1])
418        } else {
419            format!("{}ed", lemma)
420        }
421    }
422}
423
424/// Computes the gerund (present participle) form of a verb.
425///
426/// Returns the irregular form if one is defined in the verb's `forms` map under
427/// the `"gerund"` key. Otherwise, applies English gerund formation rules:
428///
429/// - Ends in `-e` (but not `-ee`) → drop `e` and append `-ing` ("make" → "making")
430/// - Ends in `-ee` → append `-ing` without dropping ("see" → "seeing")
431/// - Default → append `-ing` ("run" → "running")
432///
433/// Note: This implementation does not handle consonant doubling (e.g., "run" → "running"
434/// should double the 'n', but this produces "runing"). For accurate results with such
435/// verbs, provide an irregular form in the `forms` map.
436///
437/// # Arguments
438///
439/// * `verb` - The verb entry containing the lemma and optional irregular forms.
440///
441/// # Examples
442///
443/// ```
444/// use logicaffeine_lexicon::runtime::{VerbEntry, gerund};
445/// use std::collections::HashMap;
446///
447/// let make = VerbEntry {
448///     lemma: "make".to_string(),
449///     class: "Activity".to_string(),
450///     forms: HashMap::new(),
451///     features: vec![],
452/// };
453/// assert_eq!(gerund(&make), "making");
454///
455/// let see = VerbEntry {
456///     lemma: "see".to_string(),
457///     class: "Activity".to_string(),
458///     forms: HashMap::new(),
459///     features: vec![],
460/// };
461/// assert_eq!(gerund(&see), "seeing");
462/// ```
463pub fn gerund(verb: &VerbEntry) -> String {
464    if let Some(form) = verb.forms.get("gerund") {
465        form.clone()
466    } else {
467        let lemma = verb.lemma.to_lowercase();
468        if lemma.ends_with('e') && !lemma.ends_with("ee") {
469            format!("{}ing", &lemma[..lemma.len()-1])
470        } else {
471            format!("{}ing", lemma)
472        }
473    }
474}
475
476#[cfg(test)]
477mod tests {
478    use super::*;
479
480    #[test]
481    fn test_lexicon_loads() {
482        let index = LexiconIndex::new();
483        assert!(!index.proper_nouns().is_empty());
484        assert!(!index.common_nouns().is_empty());
485        assert!(!index.intersective_adjectives().is_empty());
486    }
487
488    #[test]
489    fn test_proper_nouns() {
490        let index = LexiconIndex::new();
491        let proper = index.proper_nouns();
492        assert!(proper.iter().any(|n| n.lemma == "John"));
493        assert!(proper.iter().any(|n| n.lemma == "Mary"));
494    }
495
496    #[test]
497    fn test_intersective_adjectives() {
498        let index = LexiconIndex::new();
499        let adj = index.intersective_adjectives();
500        assert!(adj.iter().any(|a| a.lemma == "Happy"));
501        assert!(adj.iter().any(|a| a.lemma == "Red"));
502    }
503
504    #[test]
505    fn test_pluralize() {
506        let noun = NounEntry {
507            lemma: "Dog".to_string(),
508            forms: HashMap::new(),
509            features: vec![],
510            sort: None,
511        };
512        assert_eq!(pluralize(&noun), "dogs");
513
514        let noun_irregular = NounEntry {
515            lemma: "Man".to_string(),
516            forms: [("plural".to_string(), "men".to_string())].into(),
517            features: vec![],
518            sort: None,
519        };
520        assert_eq!(pluralize(&noun_irregular), "men");
521    }
522
523    #[test]
524    fn test_present_3s() {
525        let verb = VerbEntry {
526            lemma: "Run".to_string(),
527            class: "Activity".to_string(),
528            forms: HashMap::new(),
529            features: vec![],
530        };
531        assert_eq!(present_3s(&verb), "runs");
532    }
533}