logicaffeine_language/analysis/
discovery.rs

1//! Discovery pass for type and policy extraction.
2//!
3//! Runs before main parsing to scan tokens for type and policy definitions.
4//! Populates [`TypeRegistry`] and [`PolicyRegistry`] for use during parsing.
5//!
6//! # Discovery Targets
7//!
8//! | Block | Pattern | Result |
9//! |-------|---------|--------|
10//! | `## Definition` | "A Stack is a generic collection." | `TypeDef::Generic` |
11//! | `## Definition` | "A User is a structure." | `TypeDef::Struct` |
12//! | `## Definition` | "A Shape is an enum." | `TypeDef::Enum` |
13//! | `## Policy` | "A user can publish if they are admin." | `CapabilityDef` |
14//!
15//! # Key Function
16//!
17//! [`DiscoveryPass::run`] - Execute the discovery pass and return registries.
18
19use crate::token::{Token, TokenType, BlockType};
20use logicaffeine_base::{Interner, Symbol};
21use super::registry::{TypeRegistry, TypeDef, FieldDef, FieldType, VariantDef};
22use super::policy::{PolicyRegistry, PredicateDef, CapabilityDef, PolicyCondition};
23use super::dependencies::scan_dependencies;
24
25/// Result of running the discovery pass
26pub struct DiscoveryResult {
27    pub types: TypeRegistry,
28    pub policies: PolicyRegistry,
29}
30
31/// Discovery pass that scans tokens before main parsing to build a TypeRegistry.
32///
33/// This pass looks for type definitions in `## Definition` blocks:
34/// - "A Stack is a generic collection." → Generic type
35/// - "A User is a structure." → Struct type
36/// - "A Shape is an enum." → Enum type
37///
38/// Phase 50: Also scans `## Policy` blocks for security predicates and capabilities.
39pub struct DiscoveryPass<'a> {
40    tokens: &'a [Token],
41    pos: usize,
42    interner: &'a mut Interner,
43}
44
45impl<'a> DiscoveryPass<'a> {
46    pub fn new(tokens: &'a [Token], interner: &'a mut Interner) -> Self {
47        Self { tokens, pos: 0, interner }
48    }
49
50    /// Run discovery pass, returning populated TypeRegistry
51    /// (Backward compatible - returns only TypeRegistry)
52    pub fn run(&mut self) -> TypeRegistry {
53        self.run_full().types
54    }
55
56    /// Phase 50: Run discovery pass, returning both TypeRegistry and PolicyRegistry
57    pub fn run_full(&mut self) -> DiscoveryResult {
58        let mut type_registry = TypeRegistry::with_primitives(self.interner);
59        let mut policy_registry = PolicyRegistry::new();
60
61        while self.pos < self.tokens.len() {
62            // Look for Definition blocks
63            if self.check_block_header(BlockType::Definition) {
64                self.advance(); // consume ## Definition
65                self.scan_definition_block(&mut type_registry);
66            } else if self.check_block_header(BlockType::TypeDef) {
67                // Inline type definition: ## A Point has: or ## A Color is one of:
68                // The article is part of the block header, so don't skip it
69                self.advance(); // consume ## A/An
70                self.parse_type_definition_inline(&mut type_registry);
71            } else if self.check_block_header(BlockType::Policy) {
72                // Phase 50: Security policy definitions
73                self.advance(); // consume ## Policy
74                self.scan_policy_block(&mut policy_registry);
75            } else {
76                self.advance();
77            }
78        }
79
80        DiscoveryResult {
81            types: type_registry,
82            policies: policy_registry,
83        }
84    }
85
86    fn check_block_header(&self, expected: BlockType) -> bool {
87        matches!(
88            self.tokens.get(self.pos),
89            Some(Token { kind: TokenType::BlockHeader { block_type }, .. })
90            if *block_type == expected
91        )
92    }
93
94    fn scan_definition_block(&mut self, registry: &mut TypeRegistry) {
95        // Scan until next block header or EOF
96        while self.pos < self.tokens.len() {
97            if matches!(self.peek(), Some(Token { kind: TokenType::BlockHeader { .. }, .. })) {
98                break;
99            }
100
101            // Look for "A [Name] is a..." pattern
102            if self.check_article() {
103                self.try_parse_type_definition(registry);
104            } else {
105                self.advance();
106            }
107        }
108    }
109
110    /// Phase 50: Scan policy block for predicate and capability definitions
111    /// Patterns:
112    /// - "A User is admin if the user's role equals \"admin\"."
113    /// - "A User can publish the Document if the user is admin OR the user equals the document's owner."
114    fn scan_policy_block(&mut self, registry: &mut PolicyRegistry) {
115        while self.pos < self.tokens.len() {
116            if matches!(self.peek(), Some(Token { kind: TokenType::BlockHeader { .. }, .. })) {
117                break;
118            }
119
120            // Skip newlines and indentation
121            if self.check_newline() || self.check_indent() || self.check_dedent() {
122                self.advance();
123                continue;
124            }
125
126            // Look for "A [Type] is [predicate] if..." or "A [Type] can [action] ..."
127            if self.check_article() {
128                self.try_parse_policy_definition(registry);
129            } else {
130                self.advance();
131            }
132        }
133    }
134
135    /// Phase 50: Parse a policy definition
136    fn try_parse_policy_definition(&mut self, registry: &mut PolicyRegistry) {
137        self.advance(); // consume article
138
139        // Get subject type name (e.g., "User")
140        let subject_type = match self.consume_noun_or_proper() {
141            Some(sym) => sym,
142            None => return,
143        };
144
145        // Determine if predicate ("is admin") or capability ("can publish")
146        if self.check_copula() {
147            // "A User is admin if..."
148            self.advance(); // consume "is"
149
150            // Get predicate name (e.g., "admin")
151            let predicate_name = match self.consume_noun_or_proper() {
152                Some(sym) => sym,
153                None => return,
154            };
155
156            // Expect "if"
157            if !self.check_word("if") {
158                self.skip_to_period();
159                return;
160            }
161            self.advance(); // consume "if"
162
163            // Handle multi-line condition (colon followed by indented lines)
164            if self.check_colon() {
165                self.advance();
166            }
167            if self.check_newline() {
168                self.advance();
169            }
170            if self.check_indent() {
171                self.advance();
172            }
173
174            // Parse condition
175            let condition = self.parse_policy_condition(subject_type, None);
176
177            registry.register_predicate(PredicateDef {
178                subject_type,
179                predicate_name,
180                condition,
181            });
182
183            self.skip_to_period();
184        } else if self.check_word("can") {
185            // "A User can publish the Document if..."
186            self.advance(); // consume "can"
187
188            // Get action name (e.g., "publish")
189            let action = match self.consume_noun_or_proper() {
190                Some(sym) => sym,
191                None => {
192                    // Try verb token
193                    if let Some(Token { kind: TokenType::Verb { lemma, .. }, .. }) = self.peek() {
194                        let sym = *lemma;
195                        self.advance();
196                        sym
197                    } else {
198                        return;
199                    }
200                }
201            };
202
203            // Skip "the" article if present
204            if self.check_article() {
205                self.advance();
206            }
207
208            // Get object type (e.g., "Document")
209            let object_type = match self.consume_noun_or_proper() {
210                Some(sym) => sym,
211                None => return,
212            };
213
214            // Expect "if"
215            if !self.check_word("if") {
216                self.skip_to_period();
217                return;
218            }
219            self.advance(); // consume "if"
220
221            // Parse condition (may include colon for multi-line)
222            if self.check_colon() {
223                self.advance();
224            }
225            if self.check_newline() {
226                self.advance();
227            }
228            if self.check_indent() {
229                self.advance();
230            }
231
232            let condition = self.parse_policy_condition(subject_type, Some(object_type));
233
234            registry.register_capability(CapabilityDef {
235                subject_type,
236                action,
237                object_type,
238                condition,
239            });
240
241            // Skip to end of definition (may span multiple lines)
242            self.skip_policy_definition();
243        } else {
244            self.skip_to_period();
245        }
246    }
247
248    /// Phase 50: Parse a policy condition
249    /// Handles: field comparisons, predicate references, and OR/AND combinators
250    fn parse_policy_condition(&mut self, subject_type: Symbol, object_type: Option<Symbol>) -> PolicyCondition {
251        let first = self.parse_atomic_condition(subject_type, object_type);
252
253        // Check for OR/AND combinators
254        loop {
255            // Skip newlines between conditions
256            while self.check_newline() {
257                self.advance();
258            }
259
260            // Handle ", AND" or ", OR" patterns
261            if self.check_comma() {
262                self.advance(); // consume comma
263                // Skip whitespace after comma
264                while self.check_newline() {
265                    self.advance();
266                }
267            }
268
269            if self.check_word("AND") {
270                self.advance();
271                // Skip newlines after AND
272                while self.check_newline() {
273                    self.advance();
274                }
275                let right = self.parse_atomic_condition(subject_type, object_type);
276                return PolicyCondition::And(Box::new(first), Box::new(right));
277            } else if self.check_word("OR") {
278                self.advance();
279                // Skip newlines after OR
280                while self.check_newline() {
281                    self.advance();
282                }
283                let right = self.parse_atomic_condition(subject_type, object_type);
284                return PolicyCondition::Or(Box::new(first), Box::new(right));
285            } else {
286                break;
287            }
288        }
289
290        first
291    }
292
293    /// Phase 50: Parse an atomic condition
294    fn parse_atomic_condition(&mut self, subject_type: Symbol, object_type: Option<Symbol>) -> PolicyCondition {
295        // Skip "The" article if present
296        if self.check_article() {
297            self.advance();
298        }
299
300        // Get the subject reference (e.g., "user" or "user's role")
301        let subject_ref = match self.consume_noun_or_proper() {
302            Some(sym) => sym,
303            None => return PolicyCondition::FieldEquals {
304                field: self.interner.intern("unknown"),
305                value: self.interner.intern("unknown"),
306                is_string_literal: false,
307            },
308        };
309
310        // Check if it's a field access ("'s role") or a predicate ("is admin")
311        if self.check_possessive() {
312            self.advance(); // consume "'s"
313
314            // Get field name
315            let field = match self.consume_noun_or_proper() {
316                Some(sym) => sym,
317                None => return PolicyCondition::FieldEquals {
318                    field: self.interner.intern("unknown"),
319                    value: self.interner.intern("unknown"),
320                    is_string_literal: false,
321                },
322            };
323
324            // Expect "equals"
325            if self.check_word("equals") {
326                self.advance();
327
328                // Get value (string literal or identifier)
329                let (value, is_string_literal) = self.consume_value();
330
331                return PolicyCondition::FieldEquals { field, value, is_string_literal };
332            }
333        } else if self.check_copula() {
334            // "user is admin"
335            self.advance(); // consume "is"
336
337            // Get predicate name
338            let predicate = match self.consume_noun_or_proper() {
339                Some(sym) => sym,
340                None => return PolicyCondition::FieldEquals {
341                    field: self.interner.intern("unknown"),
342                    value: self.interner.intern("unknown"),
343                    is_string_literal: false,
344                },
345            };
346
347            return PolicyCondition::Predicate {
348                subject: subject_ref,
349                predicate,
350            };
351        } else if self.check_word("equals") {
352            // "user equals the document's owner"
353            self.advance(); // consume "equals"
354
355            // Skip "the" if present
356            if self.check_article() {
357                self.advance();
358            }
359
360            // Check for object field reference: "document's owner"
361            if let Some(obj_ref) = self.consume_noun_or_proper() {
362                if self.check_possessive() {
363                    self.advance(); // consume "'s"
364                    if let Some(field) = self.consume_noun_or_proper() {
365                        return PolicyCondition::ObjectFieldEquals {
366                            subject: subject_ref,
367                            object: obj_ref,
368                            field,
369                        };
370                    }
371                }
372            }
373        }
374
375        // Fallback: unknown condition
376        PolicyCondition::FieldEquals {
377            field: self.interner.intern("unknown"),
378            value: self.interner.intern("unknown"),
379            is_string_literal: false,
380        }
381    }
382
383    /// Consume a value (string literal or identifier), returning the symbol and whether it was a string literal
384    fn consume_value(&mut self) -> (Symbol, bool) {
385        if let Some(Token { kind: TokenType::StringLiteral(sym), .. }) = self.peek() {
386            let s = *sym;
387            self.advance();
388            (s, true)
389        } else if let Some(sym) = self.consume_noun_or_proper() {
390            (sym, false)
391        } else {
392            (self.interner.intern("unknown"), false)
393        }
394    }
395
396    /// Check for possessive marker ('s)
397    fn check_possessive(&self) -> bool {
398        matches!(self.peek(), Some(Token { kind: TokenType::Possessive, .. }))
399    }
400
401    /// Skip to end of a multi-line policy definition
402    fn skip_policy_definition(&mut self) {
403        let mut depth = 0;
404        while self.pos < self.tokens.len() {
405            if self.check_indent() {
406                depth += 1;
407            } else if self.check_dedent() {
408                if depth == 0 {
409                    break;
410                }
411                depth -= 1;
412            }
413            if self.check_period() && depth == 0 {
414                self.advance();
415                break;
416            }
417            if matches!(self.peek(), Some(Token { kind: TokenType::BlockHeader { .. }, .. })) {
418                break;
419            }
420            self.advance();
421        }
422    }
423
424    /// Parse inline type definition where article was part of block header (## A Point has:)
425    fn parse_type_definition_inline(&mut self, registry: &mut TypeRegistry) {
426        // Don't skip article - it was part of the block header
427        self.parse_type_definition_body(registry);
428    }
429
430    fn try_parse_type_definition(&mut self, registry: &mut TypeRegistry) {
431        self.advance(); // skip article
432        self.parse_type_definition_body(registry);
433    }
434
435    fn parse_type_definition_body(&mut self, registry: &mut TypeRegistry) {
436        if let Some(name_sym) = self.consume_noun_or_proper() {
437            // Phase 34: Check for "of [T]" which indicates user-defined generic
438            let type_params = if self.check_preposition("of") {
439                self.advance(); // consume "of"
440                self.parse_type_params()
441            } else {
442                vec![]
443            };
444
445            // Phase 47/49: Check for "is Portable/Shared and" pattern before "has:"
446            let mut is_portable = false;
447            let mut is_shared = false;
448            if self.check_copula() {
449                let copula_pos = self.pos;
450                self.advance(); // consume is/are
451
452                // Check for modifiers in any order (e.g., "is Shared and Portable and")
453                loop {
454                    if self.check_portable() {
455                        self.advance(); // consume "Portable"
456                        is_portable = true;
457                        if self.check_word("and") {
458                            self.advance(); // consume "and"
459                        }
460                    } else if self.check_shared() {
461                        self.advance(); // consume "Shared"
462                        is_shared = true;
463                        if self.check_word("and") {
464                            self.advance(); // consume "and"
465                        }
466                    } else {
467                        break;
468                    }
469                }
470
471                // If no modifiers were found, restore position
472                if !is_portable && !is_shared {
473                    self.pos = copula_pos;
474                }
475            }
476
477            // Phase 31/34: Check for "has:" which indicates struct with fields
478            // Pattern: "A Point has:" or "A Box of [T] has:" or "A Message is Portable and has:"
479            if self.check_word("has") {
480                self.advance(); // consume "has"
481                if self.check_colon() {
482                    self.advance(); // consume ":"
483                    // Skip newline if present
484                    if self.check_newline() {
485                        self.advance();
486                    }
487                    if self.check_indent() {
488                        self.advance(); // consume INDENT
489                        let fields = self.parse_struct_fields_with_params(&type_params);
490                        registry.register(name_sym, TypeDef::Struct { fields, generics: type_params, is_portable, is_shared });
491                        return;
492                    }
493                }
494            }
495
496            // Check for "is either:" or "is one of:" pattern (Phase 33/34: Sum types with variants)
497            if self.check_copula() {
498                self.advance(); // consume is/are
499
500                // Phase 33: Check for "either:" or "one of:" pattern
501                let is_enum_pattern = if self.check_either() {
502                    self.advance(); // consume "either"
503                    true
504                } else if self.check_word("one") {
505                    self.advance(); // consume "one"
506                    if self.check_word("of") {
507                        self.advance(); // consume "of"
508                        true
509                    } else {
510                        false
511                    }
512                } else {
513                    false
514                };
515
516                if is_enum_pattern {
517                    if self.check_colon() {
518                        self.advance(); // consume ":"
519                        // Skip newline if present
520                        if self.check_newline() {
521                            self.advance();
522                        }
523                        if self.check_indent() {
524                            self.advance(); // consume INDENT
525                            let variants = self.parse_enum_variants_with_params(&type_params);
526                            registry.register(name_sym, TypeDef::Enum { variants, generics: type_params, is_portable, is_shared });
527                            return;
528                        }
529                    }
530                }
531
532                if self.check_article() {
533                    self.advance(); // consume a/an
534
535                    // Look for type indicators
536                    if self.check_word("generic") {
537                        registry.register(name_sym, TypeDef::Generic { param_count: 1 });
538                        self.skip_to_period();
539                    } else if self.check_word("record") || self.check_word("struct") || self.check_word("structure") {
540                        registry.register(name_sym, TypeDef::Struct { fields: vec![], generics: vec![], is_portable: false, is_shared: false });
541                        self.skip_to_period();
542                    } else if self.check_word("sum") || self.check_word("enum") || self.check_word("choice") {
543                        registry.register(name_sym, TypeDef::Enum { variants: vec![], generics: vec![], is_portable: false, is_shared: false });
544                        self.skip_to_period();
545                    }
546                }
547            } else if !type_params.is_empty() {
548                // "A Stack of [Things] is..." - old generic syntax, still supported
549                registry.register(name_sym, TypeDef::Generic { param_count: type_params.len() });
550                self.skip_to_period();
551            }
552        }
553    }
554
555    /// Phase 33/34: Parse enum variants in "is either:" block
556    /// Each variant: "A VariantName." or "A VariantName with a field, which is Type."
557    /// or concise: "A VariantName (field: Type)."
558    fn parse_enum_variants_with_params(&mut self, type_params: &[Symbol]) -> Vec<VariantDef> {
559        let mut variants = Vec::new();
560
561        while self.pos < self.tokens.len() {
562            // Exit on dedent or next block
563            if self.check_dedent() {
564                self.advance();
565                break;
566            }
567            if matches!(self.peek(), Some(Token { kind: TokenType::BlockHeader { .. }, .. })) {
568                break;
569            }
570
571            // Skip newlines between variants
572            if self.check_newline() {
573                self.advance();
574                continue;
575            }
576
577            // Parse variant: "A VariantName [with fields | (field: Type)]." or bare "VariantName."
578            // Optionally consume article (a/an) if present
579            if self.check_article() {
580                self.advance(); // consume "A"/"An"
581            }
582
583            // Try to parse variant name (noun or proper name)
584            if let Some(variant_name) = self.consume_noun_or_proper() {
585                // Check for payload fields
586                let fields = if self.check_word("with") {
587                    // Natural syntax: "A Circle with a radius, which is Int."
588                    self.parse_variant_fields_natural_with_params(type_params)
589                } else if self.check_lparen() {
590                    // Concise syntax: "A Circle (radius: Int)."
591                    self.parse_variant_fields_concise_with_params(type_params)
592                } else {
593                    // Unit variant: "A Point." or "Point."
594                    vec![]
595                };
596
597                variants.push(VariantDef {
598                    name: variant_name,
599                    fields,
600                });
601
602                // Consume period
603                if self.check_period() {
604                    self.advance();
605                }
606            } else {
607                self.advance(); // skip malformed token
608            }
609        }
610
611        variants
612    }
613
614    /// Phase 33: Parse enum variants (backward compat wrapper)
615    fn parse_enum_variants(&mut self) -> Vec<VariantDef> {
616        self.parse_enum_variants_with_params(&[])
617    }
618
619    /// Parse variant fields in natural syntax.
620    /// Supports multiple syntaxes:
621    /// - "with a radius, which is Int." (verbose natural)
622    /// - "with radius Int" (concise natural - no article/comma)
623    fn parse_variant_fields_natural_with_params(&mut self, type_params: &[Symbol]) -> Vec<FieldDef> {
624        let mut fields = Vec::new();
625
626        // "with" has already been detected, consume it
627        self.advance();
628
629        loop {
630            // Skip article (optional)
631            if self.check_article() {
632                self.advance();
633            }
634
635            // Get field name
636            if let Some(field_name) = self.consume_noun_or_proper() {
637                // Support multiple type annotation patterns:
638                // 1. ", which is Type" (verbose)
639                // 2. " Type" (concise - just a type name after field name)
640                let ty = if self.check_comma() {
641                    self.advance(); // consume ","
642                    // Consume "which"
643                    if self.check_word("which") {
644                        self.advance();
645                    }
646                    // Consume "is"
647                    if self.check_copula() {
648                        self.advance();
649                    }
650                    self.consume_field_type_with_params(type_params)
651                } else {
652                    // Concise syntax: "radius Int" - type immediately follows field name
653                    self.consume_field_type_with_params(type_params)
654                };
655
656                fields.push(FieldDef {
657                    name: field_name,
658                    ty,
659                    is_public: true, // Variant fields are always public
660                });
661
662                // Check for "and" to continue: "and height Int"
663                // May have comma before "and"
664                if self.check_comma() {
665                    self.advance(); // consume comma before "and"
666                }
667                if self.check_word("and") {
668                    self.advance();
669                    continue;
670                }
671            }
672            break;
673        }
674
675        fields
676    }
677
678    /// Backward compat wrapper
679    fn parse_variant_fields_natural(&mut self) -> Vec<FieldDef> {
680        self.parse_variant_fields_natural_with_params(&[])
681    }
682
683    /// Parse variant fields in concise syntax: "(radius: Int)" or "(width: Int, height: Int)"
684    fn parse_variant_fields_concise_with_params(&mut self, type_params: &[Symbol]) -> Vec<FieldDef> {
685        let mut fields = Vec::new();
686
687        // Consume "("
688        self.advance();
689
690        loop {
691            // Get field name
692            if let Some(field_name) = self.consume_noun_or_proper() {
693                // Expect ": Type" pattern
694                let ty = if self.check_colon() {
695                    self.advance(); // consume ":"
696                    self.consume_field_type_with_params(type_params)
697                } else {
698                    FieldType::Primitive(self.interner.intern("Unknown"))
699                };
700
701                fields.push(FieldDef {
702                    name: field_name,
703                    ty,
704                    is_public: true, // Variant fields are always public
705                });
706
707                // Check for "," to continue
708                if self.check_comma() {
709                    self.advance();
710                    continue;
711                }
712            }
713            break;
714        }
715
716        // Consume ")"
717        if self.check_rparen() {
718            self.advance();
719        }
720
721        fields
722    }
723
724    /// Backward compat wrapper
725    fn parse_variant_fields_concise(&mut self) -> Vec<FieldDef> {
726        self.parse_variant_fields_concise_with_params(&[])
727    }
728
729    /// Parse struct fields in "has:" block
730    /// Each field: "a [public] name, which is Type."
731    fn parse_struct_fields_with_params(&mut self, type_params: &[Symbol]) -> Vec<FieldDef> {
732        let mut fields = Vec::new();
733
734        while self.pos < self.tokens.len() {
735            // Exit on dedent or next block
736            if self.check_dedent() {
737                self.advance();
738                break;
739            }
740            if matches!(self.peek(), Some(Token { kind: TokenType::BlockHeader { .. }, .. })) {
741                break;
742            }
743
744            // Skip newlines between fields
745            if self.check_newline() {
746                self.advance();
747                continue;
748            }
749
750            // Parse field: "a [public] name, which is Type." or "name: Type." (no article)
751            // Check for article (optional for concise syntax)
752            let has_article = self.check_article();
753            if has_article {
754                self.advance(); // consume "a"/"an"
755            }
756
757            // Check for "public" modifier
758            let has_public_keyword = if self.check_word("public") {
759                self.advance();
760                true
761            } else {
762                false
763            };
764            // Visibility determined later based on syntax used
765            let mut is_public = has_public_keyword;
766
767            // Get field name - try to parse if we had article OR if next token looks like identifier
768            if let Some(field_name) = self.consume_noun_or_proper() {
769                // Support both syntaxes:
770                // 1. "name: Type." (concise) - public by default
771                // 2. "name, which is Type." (natural) - public by default
772                let ty = if self.check_colon() {
773                    // Concise syntax: "x: Int" - public by default
774                    is_public = true;
775                    self.advance(); // consume ":"
776                    self.consume_field_type_with_params(type_params)
777                } else if self.check_comma() {
778                    // Natural syntax: "name, which is Type" - also public by default
779                    is_public = true;
780                    self.advance(); // consume ","
781                    // Consume "which"
782                    if self.check_word("which") {
783                        self.advance();
784                    }
785                    // Consume "is"
786                    if self.check_copula() {
787                        self.advance();
788                    }
789                    self.consume_field_type_with_params(type_params)
790                } else if !has_article {
791                    // No colon and no article - this wasn't a field, skip
792                    continue;
793                } else {
794                    // Fallback: unknown type
795                    FieldType::Primitive(self.interner.intern("Unknown"))
796                };
797
798                fields.push(FieldDef {
799                    name: field_name,
800                    ty,
801                    is_public,
802                });
803
804                // Consume period
805                if self.check_period() {
806                    self.advance();
807                }
808            } else if !has_article {
809                // Didn't have article and couldn't get field name - skip this token
810                self.advance();
811            }
812        }
813
814        fields
815    }
816
817    /// Backward compat wrapper
818    fn parse_struct_fields(&mut self) -> Vec<FieldDef> {
819        self.parse_struct_fields_with_params(&[])
820    }
821
822    /// Parse a field type reference
823    fn consume_field_type(&mut self) -> FieldType {
824        // Skip article if present (e.g., "a Tally" -> "Tally")
825        if self.check_article() {
826            self.advance();
827        }
828
829        if let Some(name) = self.consume_noun_or_proper() {
830            let name_str = self.interner.resolve(name);
831
832            // Phase 49c: Check for bias/algorithm modifier on SharedSet: "SharedSet (AddWins) of T"
833            let modified_name = if name_str == "SharedSet" || name_str == "ORSet" {
834                if self.check_lparen() {
835                    self.advance(); // consume "("
836                    let modifier = if self.check_removewins() {
837                        self.advance(); // consume "RemoveWins"
838                        Some("SharedSet_RemoveWins")
839                    } else if self.check_addwins() {
840                        self.advance(); // consume "AddWins"
841                        Some("SharedSet_AddWins")
842                    } else {
843                        None
844                    };
845                    if self.check_rparen() {
846                        self.advance(); // consume ")"
847                    }
848                    modifier.map(|m| self.interner.intern(m))
849                } else {
850                    None
851                }
852            } else if name_str == "SharedSequence" {
853                // Phase 49c: Check for algorithm modifier on SharedSequence: "SharedSequence (YATA) of T"
854                if self.check_lparen() {
855                    self.advance(); // consume "("
856                    let modifier = if self.check_yata() {
857                        self.advance(); // consume "YATA"
858                        Some("SharedSequence_YATA")
859                    } else {
860                        None
861                    };
862                    if self.check_rparen() {
863                        self.advance(); // consume ")"
864                    }
865                    modifier.map(|m| self.interner.intern(m))
866                } else {
867                    None
868                }
869            } else {
870                None
871            };
872
873            // Use modified name if we found a modifier, otherwise use original
874            let final_name = modified_name.unwrap_or(name);
875            let final_name_str = self.interner.resolve(final_name);
876
877            // Phase 49c: Handle "SharedMap from K to V" / "ORMap from K to V" syntax
878            if (final_name_str == "SharedMap" || final_name_str == "ORMap") && self.check_from() {
879                self.advance(); // consume "from"
880                let key_type = self.consume_field_type();
881                // Expect "to" (can be TokenType::To or preposition)
882                if self.check_to() {
883                    self.advance(); // consume "to"
884                }
885                let value_type = self.consume_field_type();
886                return FieldType::Generic { base: final_name, params: vec![key_type, value_type] };
887            }
888
889            // Check for generic: "List of Int", "Seq of Text"
890            if self.check_preposition("of") {
891                self.advance();
892                let param = self.consume_field_type();
893                return FieldType::Generic { base: final_name, params: vec![param] };
894            }
895
896            // Phase 49b: "Divergent T" syntax (no "of" required)
897            if final_name_str == "Divergent" {
898                // Next token should be the inner type
899                let param = self.consume_field_type();
900                return FieldType::Generic { base: final_name, params: vec![param] };
901            }
902
903            // Check if primitive
904            match final_name_str {
905                "Int" | "Nat" | "Text" | "Bool" | "Real" | "Unit" => FieldType::Primitive(final_name),
906                _ => FieldType::Named(final_name),
907            }
908        } else {
909            FieldType::Primitive(self.interner.intern("Unknown"))
910        }
911    }
912
913    // Helper methods
914    fn peek(&self) -> Option<&Token> {
915        self.tokens.get(self.pos)
916    }
917
918    fn advance(&mut self) {
919        if self.pos < self.tokens.len() {
920            self.pos += 1;
921        }
922    }
923
924    fn check_article(&self) -> bool {
925        match self.peek() {
926            Some(Token { kind: TokenType::Article(_), .. }) => true,
927            // Also accept ProperName("A") / ProperName("An") which can occur at line starts
928            Some(Token { kind: TokenType::ProperName(sym), .. }) => {
929                let text = self.interner.resolve(*sym);
930                text.eq_ignore_ascii_case("a") || text.eq_ignore_ascii_case("an")
931            }
932            _ => false,
933        }
934    }
935
936    fn check_copula(&self) -> bool {
937        match self.peek() {
938            Some(Token { kind: TokenType::Is | TokenType::Are, .. }) => true,
939            // Also match "is" when tokenized as a verb (common in declarative mode)
940            Some(Token { kind: TokenType::Verb { lemma, .. }, .. }) => {
941                let word = self.interner.resolve(*lemma).to_lowercase();
942                word == "is" || word == "are"
943            }
944            _ => false,
945        }
946    }
947
948    fn check_preposition(&self, word: &str) -> bool {
949        if let Some(Token { kind: TokenType::Preposition(sym), .. }) = self.peek() {
950            self.interner.resolve(*sym) == word
951        } else {
952            false
953        }
954    }
955
956    fn consume_noun_or_proper(&mut self) -> Option<Symbol> {
957        let t = self.peek()?;
958        match &t.kind {
959            TokenType::Noun(s) | TokenType::ProperName(s) => {
960                let sym = *s;
961                self.advance();
962                Some(sym)
963            }
964            // Phase 31: Also accept Adjective as identifier (for field names like "x")
965            TokenType::Adjective(s) => {
966                let sym = *s;
967                self.advance();
968                Some(sym)
969            }
970            // Phase 47: Accept Performative as type name (for agent messages like "Command")
971            TokenType::Performative(s) => {
972                let sym = *s;
973                self.advance();
974                Some(sym)
975            }
976            // Phase 34: Accept special tokens as identifiers using their lexeme
977            TokenType::Items | TokenType::Some => {
978                let sym = t.lexeme;
979                self.advance();
980                Some(sym)
981            }
982            // Phase 49/50: Accept Verb tokens as identifiers
983            // - Uppercase verbs like "Setting" are type names
984            // - Lowercase verbs like "trusted", "privileged" are predicate names
985            // Use lexeme to preserve the original word (not lemma which strips suffixes)
986            TokenType::Verb { .. } => {
987                let sym = t.lexeme;
988                self.advance();
989                Some(sym)
990            }
991            // Phase 49b: Accept CRDT type tokens as type names
992            TokenType::Tally => {
993                self.advance();
994                Some(self.interner.intern("Tally"))
995            }
996            TokenType::SharedSet => {
997                self.advance();
998                Some(self.interner.intern("SharedSet"))
999            }
1000            TokenType::SharedSequence => {
1001                self.advance();
1002                Some(self.interner.intern("SharedSequence"))
1003            }
1004            TokenType::CollaborativeSequence => {
1005                self.advance();
1006                Some(self.interner.intern("CollaborativeSequence"))
1007            }
1008            TokenType::SharedMap => {
1009                self.advance();
1010                Some(self.interner.intern("SharedMap"))
1011            }
1012            TokenType::Divergent => {
1013                self.advance();
1014                Some(self.interner.intern("Divergent"))
1015            }
1016            // Phase 49: Accept Ambiguous tokens (e.g., "name" could be verb or noun)
1017            // Use lexeme to get the original word
1018            TokenType::Ambiguous { .. } => {
1019                let sym = t.lexeme;
1020                self.advance();
1021                Some(sym)
1022            }
1023            // Phase 103: Accept Focus tokens as identifiers (e.g., "Just" for Maybe variants)
1024            TokenType::Focus(_) => {
1025                let sym = t.lexeme;
1026                self.advance();
1027                Some(sym)
1028            }
1029            // Phase 103: Accept Nothing token as identifier (for Maybe/Option variants)
1030            TokenType::Nothing => {
1031                let sym = t.lexeme;
1032                self.advance();
1033                Some(sym)
1034            }
1035            // Phase 103: Accept Article tokens as type parameter names (L, R, A, etc.)
1036            TokenType::Article(_) => {
1037                let sym = t.lexeme;
1038                self.advance();
1039                Some(sym)
1040            }
1041            // Phase 103: Accept Either token as type name (for Either type definition)
1042            TokenType::Either => {
1043                let sym = t.lexeme;
1044                self.advance();
1045                Some(sym)
1046            }
1047            _ => None
1048        }
1049    }
1050
1051    fn check_word(&self, word: &str) -> bool {
1052        if let Some(token) = self.peek() {
1053            // Check against the lexeme of the token
1054            self.interner.resolve(token.lexeme).eq_ignore_ascii_case(word)
1055        } else {
1056            false
1057        }
1058    }
1059
1060    fn skip_to_period(&mut self) {
1061        while self.pos < self.tokens.len() {
1062            if matches!(self.peek(), Some(Token { kind: TokenType::Period, .. })) {
1063                self.advance();
1064                break;
1065            }
1066            self.advance();
1067        }
1068    }
1069
1070    fn check_colon(&self) -> bool {
1071        matches!(self.peek(), Some(Token { kind: TokenType::Colon, .. }))
1072    }
1073
1074    fn check_newline(&self) -> bool {
1075        matches!(self.peek(), Some(Token { kind: TokenType::Newline, .. }))
1076    }
1077
1078    fn check_indent(&self) -> bool {
1079        matches!(self.peek(), Some(Token { kind: TokenType::Indent, .. }))
1080    }
1081
1082    fn check_dedent(&self) -> bool {
1083        matches!(self.peek(), Some(Token { kind: TokenType::Dedent, .. }))
1084    }
1085
1086    fn check_comma(&self) -> bool {
1087        matches!(self.peek(), Some(Token { kind: TokenType::Comma, .. }))
1088    }
1089
1090    fn check_period(&self) -> bool {
1091        matches!(self.peek(), Some(Token { kind: TokenType::Period, .. }))
1092    }
1093
1094    fn check_either(&self) -> bool {
1095        matches!(self.peek(), Some(Token { kind: TokenType::Either, .. }))
1096    }
1097
1098    fn check_lparen(&self) -> bool {
1099        matches!(self.peek(), Some(Token { kind: TokenType::LParen, .. }))
1100    }
1101
1102    fn check_rparen(&self) -> bool {
1103        matches!(self.peek(), Some(Token { kind: TokenType::RParen, .. }))
1104    }
1105
1106    /// Phase 49c: Check for AddWins token
1107    fn check_addwins(&self) -> bool {
1108        matches!(self.peek(), Some(Token { kind: TokenType::AddWins, .. }))
1109    }
1110
1111    /// Phase 49c: Check for RemoveWins token
1112    fn check_removewins(&self) -> bool {
1113        matches!(self.peek(), Some(Token { kind: TokenType::RemoveWins, .. }))
1114    }
1115
1116    /// Phase 49c: Check for YATA token
1117    fn check_yata(&self) -> bool {
1118        matches!(self.peek(), Some(Token { kind: TokenType::YATA, .. }))
1119    }
1120
1121    /// Phase 49c: Check for "to" (either TokenType::To or preposition "to")
1122    fn check_to(&self) -> bool {
1123        match self.peek() {
1124            Some(Token { kind: TokenType::To, .. }) => true,
1125            Some(Token { kind: TokenType::Preposition(sym), .. }) => {
1126                self.interner.resolve(*sym) == "to"
1127            }
1128            _ => false,
1129        }
1130    }
1131
1132    /// Phase 49c: Check for "from" (either TokenType::From or preposition "from")
1133    fn check_from(&self) -> bool {
1134        match self.peek() {
1135            Some(Token { kind: TokenType::From, .. }) => true,
1136            Some(Token { kind: TokenType::Preposition(sym), .. }) => {
1137                self.interner.resolve(*sym) == "from"
1138            }
1139            _ => false,
1140        }
1141    }
1142
1143    /// Phase 47: Check for Portable token
1144    fn check_portable(&self) -> bool {
1145        matches!(self.peek(), Some(Token { kind: TokenType::Portable, .. }))
1146    }
1147
1148    /// Phase 49: Check for Shared token
1149    fn check_shared(&self) -> bool {
1150        matches!(self.peek(), Some(Token { kind: TokenType::Shared, .. }))
1151    }
1152
1153    // Phase 34: Bracket checks for type parameters
1154    fn check_lbracket(&self) -> bool {
1155        matches!(self.peek(), Some(Token { kind: TokenType::LBracket, .. }))
1156    }
1157
1158    fn check_rbracket(&self) -> bool {
1159        matches!(self.peek(), Some(Token { kind: TokenType::RBracket, .. }))
1160    }
1161
1162    /// Phase 34: Parse type parameters in brackets: "[T]" or "[A] and [B]"
1163    fn parse_type_params(&mut self) -> Vec<Symbol> {
1164        let mut params = Vec::new();
1165
1166        loop {
1167            if self.check_lbracket() {
1168                self.advance(); // consume [
1169                if let Some(param) = self.consume_noun_or_proper() {
1170                    params.push(param);
1171                }
1172                if self.check_rbracket() {
1173                    self.advance(); // consume ]
1174                }
1175            }
1176
1177            // Check for "and" separator for multi-param generics
1178            if self.check_word("and") {
1179                self.advance();
1180                continue;
1181            }
1182            break;
1183        }
1184        params
1185    }
1186
1187    /// Phase 34: Parse a field type reference, recognizing type parameters
1188    fn consume_field_type_with_params(&mut self, type_params: &[Symbol]) -> FieldType {
1189        // Phase 34: Single-letter type params like "A" may be tokenized as Article
1190        // Check for Article that matches a type param first
1191        if let Some(Token { kind: TokenType::Article(_), lexeme, .. }) = self.peek() {
1192            let text = self.interner.resolve(*lexeme);
1193            // Find matching type param by name (case-insensitive for single letters)
1194            for &param_sym in type_params {
1195                let param_name = self.interner.resolve(param_sym);
1196                if text.eq_ignore_ascii_case(param_name) {
1197                    self.advance(); // consume the article token
1198                    return FieldType::TypeParam(param_sym);
1199                }
1200            }
1201            // Article didn't match a type param, skip it (e.g., "a Tally" -> "Tally")
1202            self.advance();
1203        }
1204
1205        if let Some(name) = self.consume_noun_or_proper() {
1206            // Check if this is a type parameter reference
1207            if type_params.contains(&name) {
1208                return FieldType::TypeParam(name);
1209            }
1210
1211            let name_str = self.interner.resolve(name);
1212
1213            // Phase 49c: Check for bias/algorithm modifier on SharedSet: "SharedSet (AddWins) of T"
1214            let modified_name = if name_str == "SharedSet" || name_str == "ORSet" {
1215                if self.check_lparen() {
1216                    self.advance(); // consume "("
1217                    let modifier = if self.check_removewins() {
1218                        self.advance(); // consume "RemoveWins"
1219                        Some("SharedSet_RemoveWins")
1220                    } else if self.check_addwins() {
1221                        self.advance(); // consume "AddWins"
1222                        Some("SharedSet_AddWins")
1223                    } else {
1224                        None
1225                    };
1226                    if self.check_rparen() {
1227                        self.advance(); // consume ")"
1228                    }
1229                    modifier.map(|m| self.interner.intern(m))
1230                } else {
1231                    None
1232                }
1233            } else if name_str == "SharedSequence" {
1234                // Phase 49c: Check for algorithm modifier on SharedSequence: "SharedSequence (YATA) of T"
1235                if self.check_lparen() {
1236                    self.advance(); // consume "("
1237                    let modifier = if self.check_yata() {
1238                        self.advance(); // consume "YATA"
1239                        Some("SharedSequence_YATA")
1240                    } else {
1241                        None
1242                    };
1243                    if self.check_rparen() {
1244                        self.advance(); // consume ")"
1245                    }
1246                    modifier.map(|m| self.interner.intern(m))
1247                } else {
1248                    None
1249                }
1250            } else {
1251                None
1252            };
1253
1254            // Use modified name if we found a modifier, otherwise use original
1255            let final_name = modified_name.unwrap_or(name);
1256            let final_name_str = self.interner.resolve(final_name);
1257
1258            // Phase 49c: Handle "SharedMap from K to V" / "ORMap from K to V" syntax
1259            if (final_name_str == "SharedMap" || final_name_str == "ORMap") && self.check_from() {
1260                self.advance(); // consume "from"
1261                let key_type = self.consume_field_type_with_params(type_params);
1262                // Expect "to" (can be TokenType::To or preposition)
1263                if self.check_to() {
1264                    self.advance(); // consume "to"
1265                }
1266                let value_type = self.consume_field_type_with_params(type_params);
1267                return FieldType::Generic { base: final_name, params: vec![key_type, value_type] };
1268            }
1269
1270            // Check for generic: "List of Int", "Seq of Text", "List of T"
1271            if self.check_preposition("of") {
1272                self.advance();
1273                let param = self.consume_field_type_with_params(type_params);
1274                return FieldType::Generic { base: final_name, params: vec![param] };
1275            }
1276
1277            // Phase 49b: "Divergent T" syntax (no "of" required)
1278            if final_name_str == "Divergent" {
1279                // Next token should be the inner type
1280                let param = self.consume_field_type_with_params(type_params);
1281                return FieldType::Generic { base: final_name, params: vec![param] };
1282            }
1283
1284            // Check if primitive
1285            match final_name_str {
1286                "Int" | "Nat" | "Text" | "Bool" | "Real" | "Unit" => FieldType::Primitive(final_name),
1287                _ => FieldType::Named(final_name),
1288            }
1289        } else {
1290            FieldType::Primitive(self.interner.intern("Unknown"))
1291        }
1292    }
1293}
1294
1295// Note: discover_with_imports is defined in the main crate since it needs
1296// access to the project::Loader which is part of the compile system.
1297
1298#[cfg(test)]
1299mod tests {
1300    use super::*;
1301    use crate::Lexer;
1302    use crate::mwe;
1303
1304    fn make_tokens(source: &str, interner: &mut Interner) -> Vec<Token> {
1305        let mut lexer = Lexer::new(source, interner);
1306        let tokens = lexer.tokenize();
1307        let mwe_trie = mwe::build_mwe_trie();
1308        mwe::apply_mwe_pipeline(tokens, &mwe_trie, interner)
1309    }
1310
1311    #[test]
1312    fn discovery_finds_generic_in_definition_block() {
1313        let source = "## Definition\nA Stack is a generic collection.";
1314        let mut interner = Interner::new();
1315        let tokens = make_tokens(source, &mut interner);
1316
1317        let mut discovery = DiscoveryPass::new(&tokens, &mut interner);
1318        let registry = discovery.run();
1319
1320        let stack = interner.intern("Stack");
1321        assert!(registry.is_generic(stack), "Stack should be discovered as generic");
1322    }
1323
1324    #[test]
1325    fn discovery_parses_struct_with_fields() {
1326        let source = r#"## Definition
1327A Point has:
1328    an x, which is Int.
1329    a y, which is Int.
1330"#;
1331        let mut interner = Interner::new();
1332        let tokens = make_tokens(source, &mut interner);
1333
1334        let mut discovery = DiscoveryPass::new(&tokens, &mut interner);
1335        let registry = discovery.run();
1336
1337        let point = interner.intern("Point");
1338        assert!(registry.is_type(point), "Point should be registered");
1339
1340        if let Some(TypeDef::Struct { fields, generics, .. }) = registry.get(point) {
1341            assert_eq!(fields.len(), 2, "Point should have 2 fields, got {:?}", fields);
1342            assert_eq!(interner.resolve(fields[0].name), "x");
1343            assert_eq!(interner.resolve(fields[1].name), "y");
1344            assert!(generics.is_empty(), "Point should have no generics");
1345        } else {
1346            panic!("Point should be a struct with fields");
1347        }
1348    }
1349
1350    #[test]
1351    fn discovery_works_with_markdown_header() {
1352        // Phase 36: LOGOS files have `# Header` before `## Definition`
1353        let source = r#"# Geometry
1354
1355## Definition
1356A Point has:
1357    an x, which is Int.
1358"#;
1359        let mut interner = Interner::new();
1360        let tokens = make_tokens(source, &mut interner);
1361
1362        // Debug: print tokens to see what we're getting
1363        for (i, tok) in tokens.iter().enumerate() {
1364            eprintln!("Token {}: {:?}", i, tok.kind);
1365        }
1366
1367        let mut discovery = DiscoveryPass::new(&tokens, &mut interner);
1368        let registry = discovery.run();
1369        let point = interner.intern("Point");
1370        assert!(registry.is_type(point), "Point should be discovered even with # header");
1371    }
1372
1373    #[test]
1374    fn discovery_parses_portable_enum() {
1375        let source = r#"## Definition
1376A Command is Portable and is either:
1377    a Start.
1378    a Stop.
1379    a Pause.
1380"#;
1381        let mut interner = Interner::new();
1382        let tokens = make_tokens(source, &mut interner);
1383
1384        // Debug: print tokens to see what we're getting
1385        eprintln!("Tokens for portable enum:");
1386        for (i, tok) in tokens.iter().enumerate() {
1387            eprintln!("Token {}: {:?} ({})", i, tok.kind, interner.resolve(tok.lexeme));
1388        }
1389
1390        let mut discovery = DiscoveryPass::new(&tokens, &mut interner);
1391        let registry = discovery.run();
1392
1393        let command = interner.intern("Command");
1394        assert!(registry.is_type(command), "Command should be registered as type");
1395
1396        if let Some(TypeDef::Enum { variants, is_portable, .. }) = registry.get(command) {
1397            eprintln!("Command is_portable: {}", is_portable);
1398            eprintln!("Variants: {:?}", variants.iter().map(|v| interner.resolve(v.name)).collect::<Vec<_>>());
1399            assert!(*is_portable, "Command should be portable");
1400            assert_eq!(variants.len(), 3, "Command should have 3 variants");
1401        } else {
1402            panic!("Command should be an enum, got: {:?}", registry.get(command));
1403        }
1404    }
1405
1406    #[test]
1407    fn discovery_parses_lww_int_field() {
1408        let source = r#"## Definition
1409A Setting is Shared and has:
1410    a volume, which is LastWriteWins of Int.
1411"#;
1412        let mut interner = Interner::new();
1413        let tokens = make_tokens(source, &mut interner);
1414
1415        // Debug: print tokens
1416        eprintln!("Tokens for LWW of Int:");
1417        for (i, tok) in tokens.iter().enumerate() {
1418            eprintln!("{:3}: {:?} ({})", i, tok.kind, interner.resolve(tok.lexeme));
1419        }
1420
1421        let mut discovery = DiscoveryPass::new(&tokens, &mut interner);
1422        let registry = discovery.run();
1423
1424        let setting = interner.intern("Setting");
1425        assert!(registry.is_type(setting), "Setting should be registered");
1426
1427        if let Some(TypeDef::Struct { fields, is_shared, .. }) = registry.get(setting) {
1428            eprintln!("is_shared: {}", is_shared);
1429            eprintln!("Fields: {:?}", fields.len());
1430            for f in fields {
1431                eprintln!("  field: {} = {:?}", interner.resolve(f.name), f.ty);
1432            }
1433            assert!(*is_shared, "Setting should be shared");
1434            assert_eq!(fields.len(), 1, "Setting should have 1 field");
1435        } else {
1436            panic!("Setting should be a struct, got: {:?}", registry.get(setting));
1437        }
1438    }
1439}