logicaffeine_language/
suggest.rs

1//! Spelling suggestions for unknown words.
2//!
3//! This module provides fuzzy matching to suggest corrections for misspelled
4//! words in the input. It uses Levenshtein distance to find similar words
5//! from a known vocabulary list.
6//!
7//! When the parser encounters an unknown word, this module can suggest
8//! "Did you mean X?" alternatives based on edit distance.
9
10/// Compute the Levenshtein edit distance between two strings.
11pub fn levenshtein(a: &str, b: &str) -> usize {
12    let a_chars: Vec<char> = a.chars().collect();
13    let b_chars: Vec<char> = b.chars().collect();
14    let m = a_chars.len();
15    let n = b_chars.len();
16
17    if m == 0 {
18        return n;
19    }
20    if n == 0 {
21        return m;
22    }
23
24    let mut prev: Vec<usize> = (0..=n).collect();
25    let mut curr = vec![0; n + 1];
26
27    for i in 1..=m {
28        curr[0] = i;
29        for j in 1..=n {
30            let cost = if a_chars[i - 1] == b_chars[j - 1] { 0 } else { 1 };
31            curr[j] = (prev[j] + 1)
32                .min(curr[j - 1] + 1)
33                .min(prev[j - 1] + cost);
34        }
35        std::mem::swap(&mut prev, &mut curr);
36    }
37
38    prev[n]
39}
40
41pub fn find_similar<'a>(word: &str, candidates: &[&'a str], max_distance: usize) -> Option<&'a str> {
42    let word_lower = word.to_lowercase();
43    let mut best: Option<(&str, usize)> = None;
44
45    for &candidate in candidates {
46        let dist = levenshtein(&word_lower, &candidate.to_lowercase());
47        if dist <= max_distance {
48            match best {
49                None => best = Some((candidate, dist)),
50                Some((_, d)) if dist < d => best = Some((candidate, dist)),
51                _ => {}
52            }
53        }
54    }
55
56    best.map(|(s, _)| s)
57}
58
59pub const KNOWN_WORDS: &[&str] = &[
60    "all", "some", "no", "most", "few", "every",
61    "the", "a", "an", "this", "that",
62    "is", "are", "was", "were", "be",
63    "and", "or", "if", "then", "not",
64    "must", "can", "may", "should", "would", "could",
65    "who", "what", "where", "when", "why", "how",
66    "man", "men", "woman", "women", "dog", "cat", "bird",
67    "mortal", "happy", "sad", "tall", "fast", "slow",
68    "loves", "runs", "sees", "knows", "thinks",
69    "logic", "reason", "truth", "false",
70    "John", "Mary", "Socrates", "Aristotle",
71    "to", "by", "with", "from", "for", "in", "on", "at",
72    "himself", "herself", "itself", "themselves",
73    "he", "she", "it", "they", "him", "her", "them",
74];
75
76#[cfg(test)]
77mod tests {
78    use super::*;
79
80    #[test]
81    fn levenshtein_identical() {
82        assert_eq!(levenshtein("hello", "hello"), 0);
83    }
84
85    #[test]
86    fn levenshtein_one_char_diff() {
87        assert_eq!(levenshtein("hello", "hallo"), 1);
88    }
89
90    #[test]
91    fn levenshtein_insertion() {
92        assert_eq!(levenshtein("hello", "helllo"), 1);
93    }
94
95    #[test]
96    fn levenshtein_deletion() {
97        assert_eq!(levenshtein("hello", "helo"), 1);
98    }
99
100    #[test]
101    fn levenshtein_empty() {
102        assert_eq!(levenshtein("", "abc"), 3);
103        assert_eq!(levenshtein("abc", ""), 3);
104    }
105
106    #[test]
107    fn levenshtein_transposition() {
108        assert_eq!(levenshtein("ab", "ba"), 2);
109    }
110
111    #[test]
112    fn find_similar_typo() {
113        let result = find_similar("logoc", KNOWN_WORDS, 2);
114        assert_eq!(result, Some("logic"));
115    }
116
117    #[test]
118    fn find_similar_no_match() {
119        let result = find_similar("xyzzy", KNOWN_WORDS, 2);
120        assert_eq!(result, None);
121    }
122
123    #[test]
124    fn find_similar_case_insensitive() {
125        let result = find_similar("LOGIC", KNOWN_WORDS, 2);
126        assert_eq!(result, Some("logic"));
127    }
128}