logicaffeine_language/parser/
noun.rs

1//! Noun phrase parsing with determiners, adjectives, and possessives.
2//!
3//! This module handles the full complexity of English noun phrases including:
4//!
5//! - **Determiners**: Articles (a, the), quantifiers (every, some, no)
6//! - **Adjectives**: Pre-nominal modifiers, intersective vs subsective
7//! - **Possessives**: "John's", "his", genitive constructions
8//! - **Proper names**: Capitalized constants
9//! - **Numeric literals**: Numbers as noun phrases for comparisons
10//! - **Prepositional phrases**: Post-nominal "of" constructions
11//! - **Superlatives**: "the tallest", "the most interesting"
12//!
13//! The parsed [`NounPhrase`] struct carries definiteness, adjectives, the head
14//! noun, optional possessor, and attached prepositional phrases.
15
16use super::clause::ClauseParsing;
17use super::{ParseResult, Parser};
18use crate::ast::{LogicExpr, NounPhrase, Term};
19use crate::drs::{Case, Gender, Number};
20use logicaffeine_base::SymbolEq;
21use crate::lexicon::Definiteness;
22use crate::token::TokenType;
23use crate::transpile::capitalize_first;
24
25/// Trait for parsing noun phrases.
26///
27/// Provides methods for parsing determiners, adjectives, possessives,
28/// and converting noun phrases to first-order terms.
29pub trait NounParsing<'a, 'ctx, 'int> {
30    /// Parses a full noun phrase with optional greedy PP attachment.
31    fn parse_noun_phrase(&mut self, greedy: bool) -> ParseResult<NounPhrase<'a>>;
32    /// Parses a noun phrase suitable for relative clause antecedent.
33    fn parse_noun_phrase_for_relative(&mut self) -> ParseResult<NounPhrase<'a>>;
34    /// Converts a parsed noun phrase to a first-order term.
35    fn noun_phrase_to_term(&self, np: &NounPhrase<'a>) -> Term<'a>;
36    /// Checks for possessive marker ('s).
37    fn check_possessive(&self) -> bool;
38    /// Checks for "of" preposition (possessive or partitive).
39    fn check_of_preposition(&self) -> bool;
40    /// Checks for proper name or label (capitalized).
41    fn check_proper_name_or_label(&self) -> bool;
42    /// Checks for possessive pronoun (his, her, its, their).
43    fn check_possessive_pronoun(&self) -> bool;
44}
45
46impl<'a, 'ctx, 'int> NounParsing<'a, 'ctx, 'int> for Parser<'a, 'ctx, 'int> {
47    fn parse_noun_phrase(&mut self, greedy: bool) -> ParseResult<NounPhrase<'a>> {
48        let mut definiteness = None;
49        let mut adjectives = Vec::new();
50        let mut non_intersective_prefix: Option<crate::intern::Symbol> = None;
51        let mut possessor_from_pronoun: Option<&'a NounPhrase<'a>> = None;
52        let mut superlative_adj: Option<crate::intern::Symbol> = None;
53
54        // Phase 35: Support numeric literals as noun phrases (e.g., "equal to 42")
55        if let TokenType::Number(sym) = self.peek().kind {
56            self.advance();
57            return Ok(NounPhrase {
58                definiteness: None,
59                adjectives: &[],
60                noun: sym,
61                possessor: None,
62                pps: &[],
63                superlative: None,
64            });
65        }
66
67        if self.check_possessive_pronoun() {
68            let token = self.advance().clone();
69            let (gender, number) = match &token.kind {
70                TokenType::Pronoun { gender, number, case: Case::Possessive } => (*gender, *number),
71                TokenType::Ambiguous { primary, alternatives } => {
72                    let mut found = None;
73                    if let TokenType::Pronoun { gender, number, case: Case::Possessive } = **primary {
74                        found = Some((gender, number));
75                    }
76                    if found.is_none() {
77                        for alt in alternatives {
78                            if let TokenType::Pronoun { gender, number, case: Case::Possessive } = alt {
79                                found = Some((*gender, *number));
80                                break;
81                            }
82                        }
83                    }
84                    found.unwrap_or((Gender::Unknown, Number::Singular))
85                }
86                _ => (Gender::Unknown, Number::Singular),
87            };
88
89            let resolved = self.resolve_pronoun(gender, number)?;
90            let resolved_sym = match resolved {
91                super::ResolvedPronoun::Variable(s) | super::ResolvedPronoun::Constant(s) => s,
92            };
93
94            let possessor_np = NounPhrase {
95                definiteness: None,
96                adjectives: &[],
97                noun: resolved_sym,
98                possessor: None,
99                pps: &[],
100                superlative: None,
101            };
102            possessor_from_pronoun = Some(self.ctx.nps.alloc(possessor_np));
103            definiteness = Some(Definiteness::Definite);
104        } else if let TokenType::Article(def) = self.peek().kind {
105            // Phase 35: Disambiguate "a" as variable vs article
106            // If "a" or "an" is followed by a verb/copula/modal, it's a variable name, not an article
107            let is_variable_a = {
108                let lexeme = self.interner.resolve(self.peek().lexeme).to_lowercase();
109                if lexeme == "a" || lexeme == "an" {
110                    if let Some(next) = self.tokens.get(self.current + 1) {
111                        matches!(next.kind,
112                            TokenType::Is | TokenType::Are | TokenType::Was | TokenType::Were | // Copula
113                            TokenType::Verb { .. } | // Main verb
114                            TokenType::Auxiliary(_) | // will, did
115                            TokenType::Must | TokenType::Can | TokenType::Should | TokenType::May | // Modals
116                            TokenType::Could | TokenType::Would | TokenType::Shall | TokenType::Might |
117                            TokenType::Identity | TokenType::Equals // "a = b"
118                        )
119                    } else {
120                        false
121                    }
122                } else {
123                    false
124                }
125            };
126
127            if !is_variable_a {
128                definiteness = Some(def);
129                self.advance();
130            }
131        }
132
133        if self.check_superlative() {
134            if let TokenType::Superlative(adj) = self.advance().kind {
135                superlative_adj = Some(adj);
136            }
137        }
138
139        if self.check_non_intersective_adjective() {
140            if let TokenType::NonIntersectiveAdjective(adj) = self.advance().kind {
141                non_intersective_prefix = Some(adj);
142            }
143        }
144
145        loop {
146            if self.is_at_end() {
147                break;
148            }
149
150            let is_adjective = matches!(self.peek().kind, TokenType::Adjective(_));
151            if !is_adjective {
152                break;
153            }
154
155            let next_is_content = if self.current + 1 < self.tokens.len() {
156                matches!(
157                    self.tokens[self.current + 1].kind,
158                    TokenType::Noun(_)
159                        | TokenType::Adjective(_)
160                        | TokenType::ProperName(_)
161                )
162            } else {
163                false
164            };
165
166            if next_is_content {
167                if let TokenType::Adjective(adj) = self.advance().kind {
168                    adjectives.push(adj);
169                }
170            } else {
171                break;
172            }
173        }
174
175        let base_noun = self.consume_content_word()?;
176
177        let noun = if let Some(prefix) = non_intersective_prefix {
178            let prefix_str = self.interner.resolve(prefix);
179            let base_str = self.interner.resolve(base_noun);
180            let compound = format!("{}-{}", prefix_str, base_str);
181            self.interner.intern(&compound)
182        } else {
183            base_noun
184        };
185
186        let noun = if self.check_proper_name_or_label() {
187            let label = self.consume_content_word()?;
188            let label_str = self.interner.resolve(label);
189            let base_str = self.interner.resolve(noun);
190            let compound = format!("{}_{}", base_str, label_str);
191            self.interner.intern(&compound)
192        } else {
193            noun
194        };
195
196        if self.check_possessive() {
197            self.advance();
198
199            let possessor = self.ctx.nps.alloc(NounPhrase {
200                definiteness,
201                adjectives: self.ctx.syms.alloc_slice(adjectives.clone()),
202                noun,
203                possessor: None,
204                pps: &[],
205                superlative: superlative_adj,
206            });
207
208            let possessed_noun = self.consume_content_word()?;
209
210            return Ok(NounPhrase {
211                definiteness: None,
212                adjectives: &[],
213                noun: possessed_noun,
214                possessor: Some(possessor),
215                pps: &[],
216                superlative: None,
217            });
218        }
219
220        let should_attach_pps = greedy || self.pp_attach_to_noun;
221
222        let mut pps: Vec<&'a LogicExpr<'a>> = Vec::new();
223        if should_attach_pps {
224            while self.check_preposition() && !self.check_of_preposition() {
225                let prep_token = self.advance().clone();
226                let prep_name = if let TokenType::Preposition(sym) = prep_token.kind {
227                    sym
228                } else {
229                    break;
230                };
231
232                if self.check_content_word() || matches!(self.peek().kind, TokenType::Article(_)) {
233                    let pp_object = self.parse_noun_phrase(true)?;
234                    let placeholder_var = self.interner.intern("_PP_SELF_");
235                    let pp_pred = self.ctx.exprs.alloc(LogicExpr::Predicate {
236                        name: prep_name,
237                        args: self.ctx.terms.alloc_slice([
238                            Term::Variable(placeholder_var),
239                            Term::Constant(pp_object.noun),
240                        ]),
241                        world: None,
242                    });
243                    pps.push(pp_pred);
244                }
245            }
246        }
247        let pps_slice = self.ctx.pps.alloc_slice(pps);
248
249        if self.check_of_preposition() {
250            // Two-Pass Type Disambiguation:
251            // If the noun is a known generic type (e.g., "Stack", "List"),
252            // then "X of Y" is a type instantiation, not a possessive.
253            // For now, we still parse it as possessive structurally, but
254            // the type_registry enables future AST extensions for type annotations.
255            let is_generic = self.is_generic_type(noun);
256
257            if !is_generic {
258                // Standard possessive: "owner of house" → possessor relationship
259                self.advance();
260
261                let possessor_np = self.parse_noun_phrase(true)?;
262                let possessor = self.ctx.nps.alloc(possessor_np);
263
264                return Ok(NounPhrase {
265                    definiteness,
266                    adjectives: self.ctx.syms.alloc_slice(adjectives),
267                    noun,
268                    possessor: Some(possessor),
269                    pps: pps_slice,
270                    superlative: superlative_adj,
271                });
272            }
273            // If generic type, fall through to regular noun phrase handling.
274            // The "of [Type]" will be left unparsed for now.
275            // Future: Parse as GenericType { base: noun, params: [...] }
276        }
277
278        // Register ALL noun phrases as discourse entities, not just definite ones.
279        // This is needed for bridging anaphora: "I bought a car. The engine smoked."
280        // The indefinite "a car" must be in discourse history for "the engine" to link to it.
281        let noun_str = self.interner.resolve(noun);
282        let first_char = noun_str.chars().next().unwrap_or('X');
283        if first_char.is_alphabetic() {
284            // Use full noun name as symbol for consistent output in Full mode
285            let symbol = capitalize_first(noun_str);
286            let number = if noun_str.ends_with('s') && !noun_str.ends_with("ss") {
287                Number::Plural
288            } else {
289                Number::Singular
290            };
291        }
292
293        Ok(NounPhrase {
294            definiteness,
295            adjectives: self.ctx.syms.alloc_slice(adjectives),
296            noun,
297            possessor: possessor_from_pronoun,
298            pps: pps_slice,
299            superlative: superlative_adj,
300        })
301    }
302
303    fn parse_noun_phrase_for_relative(&mut self) -> ParseResult<NounPhrase<'a>> {
304        let mut definiteness = None;
305        let mut adjectives = Vec::new();
306
307        if let TokenType::Article(def) = self.peek().kind {
308            definiteness = Some(def);
309            self.advance();
310        }
311
312        loop {
313            if self.is_at_end() {
314                break;
315            }
316
317            let is_adjective = matches!(self.peek().kind, TokenType::Adjective(_));
318            if !is_adjective {
319                break;
320            }
321
322            let next_is_content = if self.current + 1 < self.tokens.len() {
323                matches!(
324                    self.tokens[self.current + 1].kind,
325                    TokenType::Noun(_)
326                        | TokenType::Adjective(_)
327                        | TokenType::Verb { .. }
328                        | TokenType::ProperName(_)
329                )
330            } else {
331                false
332            };
333
334            if next_is_content {
335                if let TokenType::Adjective(adj) = self.advance().kind.clone() {
336                    adjectives.push(adj);
337                }
338            } else {
339                break;
340            }
341        }
342
343        let noun = self.consume_content_word_for_relative()?;
344
345        if self.check(&TokenType::That) || self.check(&TokenType::Who) {
346            self.advance();
347            let var_name = self.interner.intern(&format!("r{}", self.var_counter));
348            self.var_counter += 1;
349            let _nested_clause = self.parse_relative_clause(var_name)?;
350        }
351
352        Ok(NounPhrase {
353            definiteness,
354            adjectives: self.ctx.syms.alloc_slice(adjectives),
355            noun,
356            possessor: None,
357            pps: &[],
358            superlative: None,
359        })
360    }
361
362    fn noun_phrase_to_term(&self, np: &NounPhrase<'a>) -> Term<'a> {
363        if let Some(possessor) = np.possessor {
364            let possessor_term = self.noun_phrase_to_term(possessor);
365            Term::Possessed {
366                possessor: self.ctx.terms.alloc(possessor_term),
367                possessed: np.noun,
368            }
369        } else {
370            Term::Constant(np.noun)
371        }
372    }
373
374    fn check_possessive(&self) -> bool {
375        matches!(self.peek().kind, TokenType::Possessive)
376    }
377
378    fn check_of_preposition(&self) -> bool {
379        if let TokenType::Preposition(p) = self.peek().kind {
380            p.is(self.interner, "of")
381        } else {
382            false
383        }
384    }
385
386    fn check_proper_name_or_label(&self) -> bool {
387        match &self.peek().kind {
388            TokenType::ProperName(_) => true,
389            TokenType::Noun(s) => {
390                let str_val = self.interner.resolve(*s);
391                str_val.len() == 1 && str_val.chars().next().unwrap().is_uppercase()
392            }
393            _ => false,
394        }
395    }
396
397    fn check_possessive_pronoun(&self) -> bool {
398        match &self.peek().kind {
399            TokenType::Pronoun { case: Case::Possessive, .. } => true,
400            TokenType::Ambiguous { primary, alternatives } => {
401                if self.noun_priority_mode {
402                    if let TokenType::Pronoun { case: Case::Possessive, .. } = **primary {
403                        return true;
404                    }
405                    for alt in alternatives {
406                        if let TokenType::Pronoun { case: Case::Possessive, .. } = alt {
407                            return true;
408                        }
409                    }
410                }
411                false
412            }
413            _ => false,
414        }
415    }
416}