logicaffeine_language/
token.rs

1//! Token types for the LOGOS lexer and parser.
2//!
3//! This module defines the vocabulary of the LOGOS language at the token level.
4//! Tokens represent the atomic syntactic units produced by the lexer and consumed
5//! by the parser.
6//!
7//! ## Token Categories
8//!
9//! | Category | Examples | Description |
10//! |----------|----------|-------------|
11//! | **Quantifiers** | every, some, no | Bind variables over domains |
12//! | **Determiners** | the, a, this | Select referents |
13//! | **Nouns** | cat, philosopher | Predicates over individuals |
14//! | **Verbs** | runs, loves | Relations between arguments |
15//! | **Adjectives** | red, happy | Modify noun denotations |
16//! | **Connectives** | and, or, implies | Combine propositions |
17//! | **Pronouns** | he, she, it | Resolve to antecedents |
18//!
19//! ## Block Types
20//!
21//! LOGOS uses markdown-style block headers for structured documents:
22//!
23//! - `## Theorem`: Declares a proposition to be proved
24//! - `## Proof`: Contains the proof steps
25//! - `## Definition`: Introduces new terminology
26//! - `## Main`: Program entry point
27
28use logicaffeine_base::Symbol;
29use logicaffeine_lexicon::{Aspect, Case, Definiteness, Gender, Number, Time, VerbClass};
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
32pub struct Span {
33    pub start: usize,
34    pub end: usize,
35}
36
37impl Span {
38    pub fn new(start: usize, end: usize) -> Self {
39        Self { start, end }
40    }
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum PresupKind {
45    Stop,
46    Start,
47    Regret,
48    Continue,
49    Realize,
50    Know,
51}
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum FocusKind {
55    Only,
56    Even,
57    Just,
58}
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61pub enum MeasureKind {
62    Much,
63    Little,
64}
65
66/// Calendar time units for Span expressions.
67///
68/// These represent variable-length calendar durations, as opposed to
69/// fixed SI time units (ns, ms, s, etc.) used in Duration.
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71pub enum CalendarUnit {
72    Day,
73    Week,
74    Month,
75    Year,
76}
77
78/// Document structure block type markers.
79///
80/// LOGOS uses markdown-style `## Header` syntax to delimit different
81/// sections of a program or proof document.
82#[derive(Debug, Clone, Copy, PartialEq, Eq)]
83pub enum BlockType {
84    /// `## Theorem` - Declares a proposition to be proved.
85    Theorem,
86    /// `## Main` - Program entry point for imperative code.
87    Main,
88    /// `## Definition` - Introduces new terminology or type definitions.
89    Definition,
90    /// `## Proof` - Contains proof steps for a theorem.
91    Proof,
92    /// `## Example` - Illustrative examples.
93    Example,
94    /// `## Logic` - Direct logical notation input.
95    Logic,
96    /// `## Note` - Explanatory documentation.
97    Note,
98    /// `## To` - Function definition block.
99    Function,
100    /// Inline type definition: `## A Point has:` or `## A Color is one of:`.
101    TypeDef,
102    /// `## Policy` - Security policy rule definitions.
103    Policy,
104    /// `## Requires` - External crate dependency declarations.
105    Requires,
106    /// `## Hardware` - Signal declarations for hardware verification.
107    Hardware,
108    /// `## Property` - Temporal assertions for hardware verification.
109    Property,
110    /// `## No` - Optimization annotation (followed by Memo, TCO, Peephole, Borrow, or Optimize).
111    No,
112}
113
114#[derive(Debug, Clone, PartialEq, Eq)]
115pub enum TokenType {
116    // Document Structure
117    BlockHeader { block_type: BlockType },
118
119    // Quantifiers
120    All,
121    No,
122    Some,
123    Any,
124    Both, // Correlative conjunction marker: "both X and Y"
125    Most,
126    Few,
127    Many,
128    Cardinal(u32),
129    AtLeast(u32),
130    AtMost(u32),
131
132    // Negative Polarity Items (NPIs)
133    Anything,
134    Anyone,
135    Nothing,
136    Nobody,
137    NoOne,
138    Nowhere,
139    Ever,
140    Never,
141
142    // Logical Connectives
143    And,
144    Or,
145    If,
146    Then,
147    Not,
148    Iff,
149    Because,
150    /// Temporal binary connective: "P until Q"
151    Until,
152    /// Temporal binary connective: "P release Q" (dual of Until)
153    Release,
154    /// Temporal binary connective: "P weak-until Q" (Until or Always)
155    WeakUntil,
156    /// Compiler-generated implication (e.g., quantifier restrictions from Kripke lowering).
157    /// Distinguished from `If` which represents user-written conditionals.
158    /// This separation gives downstream passes (KG extraction, SVA synthesis)
159    /// irrefutable provenance: `If` = user intent, `Implies` = compiler glue.
160    Implies,
161
162    // Modal Operators
163    Must,
164    Shall,
165    Should,
166    Can,
167    May,
168    Cannot,
169    Would,
170    Could,
171    Might,
172    Had,
173
174    // Imperative Statement Keywords
175    Let,
176    Set,
177    Return,
178    /// Exits the innermost while loop: `Break.`
179    Break,
180    Be,
181    While,
182    Repeat,
183    For,
184    In,
185    From,
186    Assert,
187    /// Documented assertion with justification string.
188    Trust,
189    Otherwise,
190    /// Alias for `Otherwise` - Pythonic else clause
191    Else,
192    /// Python-style else-if shorthand
193    Elif,
194    Call,
195    /// Constructor keyword for struct instantiation.
196    New,
197    /// Sum type definition keyword.
198    Either,
199    /// Pattern matching statement keyword.
200    Inspect,
201    /// Native function modifier for FFI bindings.
202    Native,
203    /// Escape hatch header keyword: "Escape to Rust:"
204    Escape,
205    /// Raw code block captured verbatim from an escape hatch body.
206    /// The Symbol holds the interned raw foreign code (indentation-stripped).
207    EscapeBlock(Symbol),
208
209    // Theorem Keywords
210    /// Premise marker in theorem blocks.
211    Given,
212    /// Goal marker in theorem blocks.
213    Prove,
214    /// Automatic proof strategy directive.
215    Auto,
216
217    // IO Keywords
218    /// "Read input from..."
219    Read,
220    /// "Write x to file..."
221    Write,
222    /// "...from the console"
223    Console,
224    /// "...from file..." or "...to file..."
225    File,
226
227    // Ownership Keywords (Move/Borrow Semantics)
228    /// Move ownership: "Give x to processor"
229    Give,
230    /// Immutable borrow: "Show x to console"
231    Show,
232
233    // Collection Operations
234    /// "Push x to items"
235    Push,
236    /// "Pop from items"
237    Pop,
238    /// "copy of slice" → slice.to_vec()
239    Copy,
240    /// "items 1 through 3" → inclusive slice
241    Through,
242    /// "length of items" → items.len()
243    Length,
244    /// "items at i" → `items[i]`
245    At,
246
247    // Set Operations
248    /// "Add x to set" (insert)
249    Add,
250    /// "Remove x from set"
251    Remove,
252    /// "set contains x"
253    Contains,
254    /// "a union b"
255    Union,
256    /// "a intersection b"
257    Intersection,
258
259    // Memory Management (Zones)
260    /// "Inside a new zone..."
261    Inside,
262    /// "...zone called..."
263    Zone,
264    /// "...called 'Scratch'"
265    Called,
266    /// "...of size 1 MB"
267    Size,
268    /// "...mapped from 'file.bin'"
269    Mapped,
270
271    // Structured Concurrency
272    /// "Attempt all of the following:" → concurrent (async, I/O-bound)
273    Attempt,
274    /// "the following"
275    Following,
276    /// "Simultaneously:" → parallel (CPU-bound)
277    Simultaneously,
278
279    // Agent System (Actor Model)
280    /// "Spawn a Worker called 'w1'" → create agent
281    Spawn,
282    /// "Send Ping to 'agent'" → send message to agent
283    Send,
284    /// "Await response from 'agent' into result" → receive message
285    Await,
286
287    // Serialization
288    /// "A Message is Portable and has:" → serde derives
289    Portable,
290
291    // Sipping Protocol
292    /// "the manifest of Zone" → FileSipper manifest
293    Manifest,
294    /// "the chunk at N in Zone" → FileSipper chunk
295    Chunk,
296
297    // CRDT Keywords
298    /// "A Counter is Shared and has:" → CRDT struct
299    Shared,
300    /// "Merge remote into local" → CRDT merge
301    Merge,
302    /// "Increase x's count by 10" → GCounter increment
303    Increase,
304
305    // Extended CRDT Keywords
306    /// "Decrease x's count by 5" → PNCounter decrement
307    Decrease,
308    /// "which is a Tally" → PNCounter type
309    Tally,
310    /// "which is a SharedSet of T" → ORSet type
311    SharedSet,
312    /// "which is a SharedSequence of T" → RGA type
313    SharedSequence,
314    /// "which is a CollaborativeSequence of T" → YATA type
315    CollaborativeSequence,
316    /// "which is a SharedMap from K to V" → ORMap type
317    SharedMap,
318    /// "which is a Divergent T" → MVRegister type
319    Divergent,
320    /// "Append x to seq" → RGA append
321    Append,
322    /// "Resolve x to value" → MVRegister resolve
323    Resolve,
324    /// "(RemoveWins)" → ORSet bias
325    RemoveWins,
326    /// "(AddWins)" → ORSet bias (default)
327    AddWins,
328    /// "(YATA)" → Sequence algorithm
329    YATA,
330    /// "x's values" → MVRegister values accessor
331    Values,
332
333    // Security Keywords
334    /// "Check that user is admin" → mandatory runtime guard
335    Check,
336
337    // P2P Networking Keywords
338    /// "Listen on \[addr\]" → bind to network address
339    Listen,
340    /// "Connect to \[addr\]" → dial a peer (NetConnect to avoid conflict)
341    NetConnect,
342    /// "Sleep N." → pause execution for N milliseconds
343    Sleep,
344
345    // GossipSub Keywords
346    /// "Sync x on 'topic'" → automatic CRDT replication
347    Sync,
348
349    // Persistence Keywords
350    /// "Mount x at \[path\]" → load/create persistent CRDT from journal
351    Mount,
352    /// "Persistent Counter" → type wrapped with journaling
353    Persistent,
354    /// "x combined with y" → string concatenation
355    Combined,
356
357    // Go-like Concurrency Keywords
358    /// "Launch a task to..." → spawn green thread
359    Launch,
360    /// "a task" → identifier for task context
361    Task,
362    /// "Pipe of Type" → channel creation
363    Pipe,
364    /// "Receive from pipe" → recv from channel
365    Receive,
366    /// "Stop handle" → abort task
367    Stop,
368    /// "Try to send/receive" → non-blocking variant
369    Try,
370    /// "Send value into pipe" → channel send
371    Into,
372    /// "Await the first of:" → select statement
373    First,
374    /// "After N seconds:" → timeout branch
375    After,
376
377    // Block Scoping
378    Colon,
379    Indent,
380    Dedent,
381    Newline,
382
383    // Content Words
384    Noun(Symbol),
385    Adjective(Symbol),
386    NonIntersectiveAdjective(Symbol),
387    Adverb(Symbol),
388    ScopalAdverb(Symbol),
389    TemporalAdverb(Symbol),
390    Verb {
391        lemma: Symbol,
392        time: Time,
393        aspect: Aspect,
394        class: VerbClass,
395    },
396    ProperName(Symbol),
397
398    /// Lexically ambiguous token (e.g., "fish" as noun or verb).
399    ///
400    /// The parser tries the primary interpretation first, then alternatives
401    /// if parsing fails. Used for parse forest generation.
402    Ambiguous {
403        primary: Box<TokenType>,
404        alternatives: Vec<TokenType>,
405    },
406
407    // Speech Acts (Performatives)
408    Performative(Symbol),
409    Exclamation,
410
411    // Articles (Definiteness)
412    Article(Definiteness),
413
414    // Temporal Auxiliaries
415    Auxiliary(Time),
416
417    // Copula & Functional
418    Is,
419    Are,
420    Was,
421    Were,
422    That,
423    Who,
424    What,
425    Where,
426    When,
427    Why,
428    Does,
429    Do,
430
431    // Identity & Reflexive (FOL)
432    Identity,
433    Equals,
434    Reflexive,
435    Reciprocal,
436    /// Pairwise list coordination: "A and B respectively love C and D"
437    Respectively,
438
439    // Pronouns (Discourse)
440    Pronoun {
441        gender: Gender,
442        number: Number,
443        case: Case,
444    },
445
446    // Prepositions (for N-ary relations)
447    Preposition(Symbol),
448
449    // Phrasal Verb Particles (up, down, out, in, off, on, away)
450    Particle(Symbol),
451
452    // Comparatives & Superlatives (Pillar 3 - Degree Semantics)
453    Comparative(Symbol),
454    Superlative(Symbol),
455    Than,
456
457    // Control Verbs (Chomsky's Control Theory)
458    To,
459
460    // Presupposition Triggers (Austin/Strawson)
461    PresupTrigger(PresupKind),
462
463    // Focus Particles (Rooth)
464    Focus(FocusKind),
465
466    // Mass Noun Measure
467    Measure(MeasureKind),
468
469    // Numeric Literals (prover-ready: stores raw string for symbolic math)
470    Number(Symbol),
471
472    /// Duration literal with SI suffix: 500ms, 2s, 50ns
473    /// Stores the value normalized to nanoseconds and preserves the original unit.
474    DurationLiteral {
475        nanos: i64,
476        original_unit: Symbol,
477    },
478
479    /// Date literal in ISO-8601 format: 2026-05-20
480    /// Stores days since Unix epoch (1970-01-01).
481    DateLiteral {
482        days: i32,
483    },
484
485    /// Time-of-day literal: 4pm, 9:30am, noon, midnight
486    /// Stores nanoseconds from midnight (00:00:00).
487    TimeLiteral {
488        nanos_from_midnight: i64,
489    },
490
491    /// Calendar time unit word: day, week, month, year (or plurals)
492    /// Used in Span expressions like "3 days" or "2 months and 5 days"
493    CalendarUnit(CalendarUnit),
494
495    /// Postfix operator for relative past time: "3 days ago"
496    Ago,
497
498    /// Postfix operator for relative future time: "3 days hence"
499    Hence,
500
501    /// Binary operator for span subtraction from a date: "3 days before 2026-05-20"
502    Before,
503
504    /// String literal: `"hello world"`
505    StringLiteral(Symbol),
506
507    /// Interpolated string literal: `"Hello, {name}!"`
508    /// Contains raw content with {} holes preserved
509    InterpolatedString(Symbol),
510
511    // Character literal: `x` (backtick syntax)
512    CharLiteral(Symbol),
513
514    // Index Access (1-indexed)
515    Item,
516    Items,
517
518    // Possession (Genitive Case)
519    Possessive,
520
521    // Punctuation
522    LParen,
523    RParen,
524    LBracket,
525    RBracket,
526    Comma,
527    Period,
528
529    // Bitwise Operators
530    /// "x xor y" → bitwise XOR (`^`)
531    Xor,
532    /// "x shifted left/right by y" → bit shift (`<<`/`>>`)
533    Shifted,
534
535    // Arithmetic Operators
536    Plus,
537    Minus,
538    Star,
539    Slash,
540    Percent,  // Modulo operator
541
542    // Comparison Operators
543    /// `<`
544    Lt,
545    /// `>`
546    Gt,
547    /// `<=`
548    LtEq,
549    /// `>=`
550    GtEq,
551    /// `==`
552    EqEq,
553    /// `!=`
554    NotEq,
555
556    /// Arrow for return type syntax: `->`
557    Arrow,
558
559    /// Assignment operator `=` for `identifier = value` syntax
560    Assign,
561
562    /// Mutability keyword `mut` for explicit mutable declarations
563    Mut,
564
565    /// Generic identifier (for equals-style assignment)
566    Identifier,
567
568    EOF,
569}
570
571#[derive(Debug, Clone)]
572pub struct Token {
573    pub kind: TokenType,
574    pub lexeme: Symbol,
575    pub span: Span,
576}
577
578impl Token {
579    pub fn new(kind: TokenType, lexeme: Symbol, span: Span) -> Self {
580        Token { kind, lexeme, span }
581    }
582}
583
584impl TokenType {
585    pub const WH_WORDS: &'static [TokenType] = &[
586        TokenType::Who,
587        TokenType::What,
588        TokenType::Where,
589        TokenType::When,
590        TokenType::Why,
591    ];
592
593    pub const MODALS: &'static [TokenType] = &[
594        TokenType::Must,
595        TokenType::Shall,
596        TokenType::Should,
597        TokenType::Can,
598        TokenType::May,
599        TokenType::Cannot,
600        TokenType::Would,
601        TokenType::Could,
602        TokenType::Might,
603    ];
604}
605
606#[cfg(test)]
607mod tests {
608    use super::*;
609
610    #[test]
611    fn span_new_stores_positions() {
612        let span = Span::new(5, 10);
613        assert_eq!(span.start, 5);
614        assert_eq!(span.end, 10);
615    }
616
617    #[test]
618    fn span_default_is_zero() {
619        let span = Span::default();
620        assert_eq!(span.start, 0);
621        assert_eq!(span.end, 0);
622    }
623
624    #[test]
625    fn token_has_span_field() {
626        use logicaffeine_base::Interner;
627        let mut interner = Interner::new();
628        let lexeme = interner.intern("test");
629        let token = Token::new(TokenType::Noun(lexeme), lexeme, Span::new(0, 4));
630        assert_eq!(token.span.start, 0);
631        assert_eq!(token.span.end, 4);
632    }
633
634    #[test]
635    fn wh_words_contains_all_wh_tokens() {
636        assert_eq!(TokenType::WH_WORDS.len(), 5);
637        assert!(TokenType::WH_WORDS.contains(&TokenType::Who));
638        assert!(TokenType::WH_WORDS.contains(&TokenType::What));
639        assert!(TokenType::WH_WORDS.contains(&TokenType::Where));
640        assert!(TokenType::WH_WORDS.contains(&TokenType::When));
641        assert!(TokenType::WH_WORDS.contains(&TokenType::Why));
642    }
643
644    #[test]
645    fn modals_contains_all_modal_tokens() {
646        assert_eq!(TokenType::MODALS.len(), 9);
647        assert!(TokenType::MODALS.contains(&TokenType::Must));
648        assert!(TokenType::MODALS.contains(&TokenType::Shall));
649        assert!(TokenType::MODALS.contains(&TokenType::Should));
650        assert!(TokenType::MODALS.contains(&TokenType::Can));
651        assert!(TokenType::MODALS.contains(&TokenType::May));
652        assert!(TokenType::MODALS.contains(&TokenType::Cannot));
653        assert!(TokenType::MODALS.contains(&TokenType::Would));
654        assert!(TokenType::MODALS.contains(&TokenType::Could));
655        assert!(TokenType::MODALS.contains(&TokenType::Might));
656    }
657}