logicaffeine_language/
token.rs

1//! Token types for the LOGOS lexer and parser.
2//!
3//! This module defines the vocabulary of the LOGOS language at the token level.
4//! Tokens represent the atomic syntactic units produced by the lexer and consumed
5//! by the parser.
6//!
7//! ## Token Categories
8//!
9//! | Category | Examples | Description |
10//! |----------|----------|-------------|
11//! | **Quantifiers** | every, some, no | Bind variables over domains |
12//! | **Determiners** | the, a, this | Select referents |
13//! | **Nouns** | cat, philosopher | Predicates over individuals |
14//! | **Verbs** | runs, loves | Relations between arguments |
15//! | **Adjectives** | red, happy | Modify noun denotations |
16//! | **Connectives** | and, or, implies | Combine propositions |
17//! | **Pronouns** | he, she, it | Resolve to antecedents |
18//!
19//! ## Block Types
20//!
21//! LOGOS uses markdown-style block headers for structured documents:
22//!
23//! - `## Theorem`: Declares a proposition to be proved
24//! - `## Proof`: Contains the proof steps
25//! - `## Definition`: Introduces new terminology
26//! - `## Main`: Program entry point
27
28use logicaffeine_base::Symbol;
29use logicaffeine_lexicon::{Aspect, Case, Definiteness, Gender, Number, Time, VerbClass};
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
32pub struct Span {
33    pub start: usize,
34    pub end: usize,
35}
36
37impl Span {
38    pub fn new(start: usize, end: usize) -> Self {
39        Self { start, end }
40    }
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum PresupKind {
45    Stop,
46    Start,
47    Regret,
48    Continue,
49    Realize,
50    Know,
51}
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum FocusKind {
55    Only,
56    Even,
57    Just,
58}
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61pub enum MeasureKind {
62    Much,
63    Little,
64}
65
66/// Document structure block type markers.
67///
68/// LOGOS uses markdown-style `## Header` syntax to delimit different
69/// sections of a program or proof document.
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71pub enum BlockType {
72    /// `## Theorem` - Declares a proposition to be proved.
73    Theorem,
74    /// `## Main` - Program entry point for imperative code.
75    Main,
76    /// `## Definition` - Introduces new terminology or type definitions.
77    Definition,
78    /// `## Proof` - Contains proof steps for a theorem.
79    Proof,
80    /// `## Example` - Illustrative examples.
81    Example,
82    /// `## Logic` - Direct logical notation input.
83    Logic,
84    /// `## Note` - Explanatory documentation.
85    Note,
86    /// `## To` - Function definition block.
87    Function,
88    /// Inline type definition: `## A Point has:` or `## A Color is one of:`.
89    TypeDef,
90    /// `## Policy` - Security policy rule definitions.
91    Policy,
92}
93
94#[derive(Debug, Clone, PartialEq, Eq)]
95pub enum TokenType {
96    // Document Structure
97    BlockHeader { block_type: BlockType },
98
99    // Quantifiers
100    All,
101    No,
102    Some,
103    Any,
104    Both, // Correlative conjunction marker: "both X and Y"
105    Most,
106    Few,
107    Many,
108    Cardinal(u32),
109    AtLeast(u32),
110    AtMost(u32),
111
112    // Negative Polarity Items (NPIs)
113    Anything,
114    Anyone,
115    Nothing,
116    Nobody,
117    NoOne,
118    Nowhere,
119    Ever,
120    Never,
121
122    // Logical Connectives
123    And,
124    Or,
125    If,
126    Then,
127    Not,
128    Iff,
129    Because,
130
131    // Modal Operators
132    Must,
133    Shall,
134    Should,
135    Can,
136    May,
137    Cannot,
138    Would,
139    Could,
140    Might,
141    Had,
142
143    // Imperative Statement Keywords
144    Let,
145    Set,
146    Return,
147    Be,
148    While,
149    Repeat,
150    For,
151    In,
152    From,
153    Assert,
154    /// Documented assertion with justification string.
155    Trust,
156    Otherwise,
157    Call,
158    /// Constructor keyword for struct instantiation.
159    New,
160    /// Sum type definition keyword.
161    Either,
162    /// Pattern matching statement keyword.
163    Inspect,
164    /// Native function modifier for FFI bindings.
165    Native,
166
167    // Theorem Keywords
168    /// Premise marker in theorem blocks.
169    Given,
170    /// Goal marker in theorem blocks.
171    Prove,
172    /// Automatic proof strategy directive.
173    Auto,
174
175    // IO Keywords
176    /// "Read input from..."
177    Read,
178    /// "Write x to file..."
179    Write,
180    /// "...from the console"
181    Console,
182    /// "...from file..." or "...to file..."
183    File,
184
185    // Ownership Keywords (Move/Borrow Semantics)
186    /// Move ownership: "Give x to processor"
187    Give,
188    /// Immutable borrow: "Show x to console"
189    Show,
190
191    // Collection Operations
192    /// "Push x to items"
193    Push,
194    /// "Pop from items"
195    Pop,
196    /// "copy of slice" → slice.to_vec()
197    Copy,
198    /// "items 1 through 3" → inclusive slice
199    Through,
200    /// "length of items" → items.len()
201    Length,
202    /// "items at i" → `items[i]`
203    At,
204
205    // Set Operations
206    /// "Add x to set" (insert)
207    Add,
208    /// "Remove x from set"
209    Remove,
210    /// "set contains x"
211    Contains,
212    /// "a union b"
213    Union,
214    /// "a intersection b"
215    Intersection,
216
217    // Memory Management (Zones)
218    /// "Inside a new zone..."
219    Inside,
220    /// "...zone called..."
221    Zone,
222    /// "...called 'Scratch'"
223    Called,
224    /// "...of size 1 MB"
225    Size,
226    /// "...mapped from 'file.bin'"
227    Mapped,
228
229    // Structured Concurrency
230    /// "Attempt all of the following:" → concurrent (async, I/O-bound)
231    Attempt,
232    /// "the following"
233    Following,
234    /// "Simultaneously:" → parallel (CPU-bound)
235    Simultaneously,
236
237    // Agent System (Actor Model)
238    /// "Spawn a Worker called 'w1'" → create agent
239    Spawn,
240    /// "Send Ping to 'agent'" → send message to agent
241    Send,
242    /// "Await response from 'agent' into result" → receive message
243    Await,
244
245    // Serialization
246    /// "A Message is Portable and has:" → serde derives
247    Portable,
248
249    // Sipping Protocol
250    /// "the manifest of Zone" → FileSipper manifest
251    Manifest,
252    /// "the chunk at N in Zone" → FileSipper chunk
253    Chunk,
254
255    // CRDT Keywords
256    /// "A Counter is Shared and has:" → CRDT struct
257    Shared,
258    /// "Merge remote into local" → CRDT merge
259    Merge,
260    /// "Increase x's count by 10" → GCounter increment
261    Increase,
262
263    // Extended CRDT Keywords
264    /// "Decrease x's count by 5" → PNCounter decrement
265    Decrease,
266    /// "which is a Tally" → PNCounter type
267    Tally,
268    /// "which is a SharedSet of T" → ORSet type
269    SharedSet,
270    /// "which is a SharedSequence of T" → RGA type
271    SharedSequence,
272    /// "which is a CollaborativeSequence of T" → YATA type
273    CollaborativeSequence,
274    /// "which is a SharedMap from K to V" → ORMap type
275    SharedMap,
276    /// "which is a Divergent T" → MVRegister type
277    Divergent,
278    /// "Append x to seq" → RGA append
279    Append,
280    /// "Resolve x to value" → MVRegister resolve
281    Resolve,
282    /// "(RemoveWins)" → ORSet bias
283    RemoveWins,
284    /// "(AddWins)" → ORSet bias (default)
285    AddWins,
286    /// "(YATA)" → Sequence algorithm
287    YATA,
288    /// "x's values" → MVRegister values accessor
289    Values,
290
291    // Security Keywords
292    /// "Check that user is admin" → mandatory runtime guard
293    Check,
294
295    // P2P Networking Keywords
296    /// "Listen on \[addr\]" → bind to network address
297    Listen,
298    /// "Connect to \[addr\]" → dial a peer (NetConnect to avoid conflict)
299    NetConnect,
300    /// "Sleep N." → pause execution for N milliseconds
301    Sleep,
302
303    // GossipSub Keywords
304    /// "Sync x on 'topic'" → automatic CRDT replication
305    Sync,
306
307    // Persistence Keywords
308    /// "Mount x at \[path\]" → load/create persistent CRDT from journal
309    Mount,
310    /// "Persistent Counter" → type wrapped with journaling
311    Persistent,
312    /// "x combined with y" → string concatenation
313    Combined,
314
315    // Go-like Concurrency Keywords
316    /// "Launch a task to..." → spawn green thread
317    Launch,
318    /// "a task" → identifier for task context
319    Task,
320    /// "Pipe of Type" → channel creation
321    Pipe,
322    /// "Receive from pipe" → recv from channel
323    Receive,
324    /// "Stop handle" → abort task
325    Stop,
326    /// "Try to send/receive" → non-blocking variant
327    Try,
328    /// "Send value into pipe" → channel send
329    Into,
330    /// "Await the first of:" → select statement
331    First,
332    /// "After N seconds:" → timeout branch
333    After,
334
335    // Block Scoping
336    Colon,
337    Indent,
338    Dedent,
339    Newline,
340
341    // Content Words
342    Noun(Symbol),
343    Adjective(Symbol),
344    NonIntersectiveAdjective(Symbol),
345    Adverb(Symbol),
346    ScopalAdverb(Symbol),
347    TemporalAdverb(Symbol),
348    Verb {
349        lemma: Symbol,
350        time: Time,
351        aspect: Aspect,
352        class: VerbClass,
353    },
354    ProperName(Symbol),
355
356    /// Lexically ambiguous token (e.g., "fish" as noun or verb).
357    ///
358    /// The parser tries the primary interpretation first, then alternatives
359    /// if parsing fails. Used for parse forest generation.
360    Ambiguous {
361        primary: Box<TokenType>,
362        alternatives: Vec<TokenType>,
363    },
364
365    // Speech Acts (Performatives)
366    Performative(Symbol),
367    Exclamation,
368
369    // Articles (Definiteness)
370    Article(Definiteness),
371
372    // Temporal Auxiliaries
373    Auxiliary(Time),
374
375    // Copula & Functional
376    Is,
377    Are,
378    Was,
379    Were,
380    That,
381    Who,
382    What,
383    Where,
384    When,
385    Why,
386    Does,
387    Do,
388
389    // Identity & Reflexive (FOL)
390    Identity,
391    Equals,
392    Reflexive,
393    Reciprocal,
394    /// Pairwise list coordination: "A and B respectively love C and D"
395    Respectively,
396
397    // Pronouns (Discourse)
398    Pronoun {
399        gender: Gender,
400        number: Number,
401        case: Case,
402    },
403
404    // Prepositions (for N-ary relations)
405    Preposition(Symbol),
406
407    // Phrasal Verb Particles (up, down, out, in, off, on, away)
408    Particle(Symbol),
409
410    // Comparatives & Superlatives (Pillar 3 - Degree Semantics)
411    Comparative(Symbol),
412    Superlative(Symbol),
413    Than,
414
415    // Control Verbs (Chomsky's Control Theory)
416    To,
417
418    // Presupposition Triggers (Austin/Strawson)
419    PresupTrigger(PresupKind),
420
421    // Focus Particles (Rooth)
422    Focus(FocusKind),
423
424    // Mass Noun Measure
425    Measure(MeasureKind),
426
427    // Numeric Literals (prover-ready: stores raw string for symbolic math)
428    Number(Symbol),
429
430    /// String literal: `"hello world"`
431    StringLiteral(Symbol),
432
433    // Character literal: `x` (backtick syntax)
434    CharLiteral(Symbol),
435
436    // Index Access (1-indexed)
437    Item,
438    Items,
439
440    // Possession (Genitive Case)
441    Possessive,
442
443    // Punctuation
444    LParen,
445    RParen,
446    LBracket,
447    RBracket,
448    Comma,
449    Period,
450
451    // Arithmetic Operators
452    Plus,
453    Minus,
454    Star,
455    Slash,
456    Percent,  // Modulo operator
457
458    // Comparison Operators
459    /// `<`
460    Lt,
461    /// `>`
462    Gt,
463    /// `<=`
464    LtEq,
465    /// `>=`
466    GtEq,
467    /// `==`
468    EqEq,
469    /// `!=`
470    NotEq,
471
472    /// Arrow for return type syntax: `->`
473    Arrow,
474
475    EOF,
476}
477
478#[derive(Debug, Clone)]
479pub struct Token {
480    pub kind: TokenType,
481    pub lexeme: Symbol,
482    pub span: Span,
483}
484
485impl Token {
486    pub fn new(kind: TokenType, lexeme: Symbol, span: Span) -> Self {
487        Token { kind, lexeme, span }
488    }
489}
490
491impl TokenType {
492    pub const WH_WORDS: &'static [TokenType] = &[
493        TokenType::Who,
494        TokenType::What,
495        TokenType::Where,
496        TokenType::When,
497        TokenType::Why,
498    ];
499
500    pub const MODALS: &'static [TokenType] = &[
501        TokenType::Must,
502        TokenType::Shall,
503        TokenType::Should,
504        TokenType::Can,
505        TokenType::May,
506        TokenType::Cannot,
507        TokenType::Would,
508        TokenType::Could,
509        TokenType::Might,
510    ];
511}
512
513#[cfg(test)]
514mod tests {
515    use super::*;
516
517    #[test]
518    fn span_new_stores_positions() {
519        let span = Span::new(5, 10);
520        assert_eq!(span.start, 5);
521        assert_eq!(span.end, 10);
522    }
523
524    #[test]
525    fn span_default_is_zero() {
526        let span = Span::default();
527        assert_eq!(span.start, 0);
528        assert_eq!(span.end, 0);
529    }
530
531    #[test]
532    fn token_has_span_field() {
533        use logicaffeine_base::Interner;
534        let mut interner = Interner::new();
535        let lexeme = interner.intern("test");
536        let token = Token::new(TokenType::Noun(lexeme), lexeme, Span::new(0, 4));
537        assert_eq!(token.span.start, 0);
538        assert_eq!(token.span.end, 4);
539    }
540
541    #[test]
542    fn wh_words_contains_all_wh_tokens() {
543        assert_eq!(TokenType::WH_WORDS.len(), 5);
544        assert!(TokenType::WH_WORDS.contains(&TokenType::Who));
545        assert!(TokenType::WH_WORDS.contains(&TokenType::What));
546        assert!(TokenType::WH_WORDS.contains(&TokenType::Where));
547        assert!(TokenType::WH_WORDS.contains(&TokenType::When));
548        assert!(TokenType::WH_WORDS.contains(&TokenType::Why));
549    }
550
551    #[test]
552    fn modals_contains_all_modal_tokens() {
553        assert_eq!(TokenType::MODALS.len(), 9);
554        assert!(TokenType::MODALS.contains(&TokenType::Must));
555        assert!(TokenType::MODALS.contains(&TokenType::Shall));
556        assert!(TokenType::MODALS.contains(&TokenType::Should));
557        assert!(TokenType::MODALS.contains(&TokenType::Can));
558        assert!(TokenType::MODALS.contains(&TokenType::May));
559        assert!(TokenType::MODALS.contains(&TokenType::Cannot));
560        assert!(TokenType::MODALS.contains(&TokenType::Would));
561        assert!(TokenType::MODALS.contains(&TokenType::Could));
562        assert!(TokenType::MODALS.contains(&TokenType::Might));
563    }
564}