logicaffeine_language/token.rs
1//! Token types for the LOGOS lexer and parser.
2//!
3//! This module defines the vocabulary of the LOGOS language at the token level.
4//! Tokens represent the atomic syntactic units produced by the lexer and consumed
5//! by the parser.
6//!
7//! ## Token Categories
8//!
9//! | Category | Examples | Description |
10//! |----------|----------|-------------|
11//! | **Quantifiers** | every, some, no | Bind variables over domains |
12//! | **Determiners** | the, a, this | Select referents |
13//! | **Nouns** | cat, philosopher | Predicates over individuals |
14//! | **Verbs** | runs, loves | Relations between arguments |
15//! | **Adjectives** | red, happy | Modify noun denotations |
16//! | **Connectives** | and, or, implies | Combine propositions |
17//! | **Pronouns** | he, she, it | Resolve to antecedents |
18//!
19//! ## Block Types
20//!
21//! LOGOS uses markdown-style block headers for structured documents:
22//!
23//! - `## Theorem`: Declares a proposition to be proved
24//! - `## Proof`: Contains the proof steps
25//! - `## Definition`: Introduces new terminology
26//! - `## Main`: Program entry point
27
28use logicaffeine_base::Symbol;
29use logicaffeine_lexicon::{Aspect, Case, Definiteness, Gender, Number, Time, VerbClass};
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
32pub struct Span {
33 pub start: usize,
34 pub end: usize,
35}
36
37impl Span {
38 pub fn new(start: usize, end: usize) -> Self {
39 Self { start, end }
40 }
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum PresupKind {
45 Stop,
46 Start,
47 Regret,
48 Continue,
49 Realize,
50 Know,
51}
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum FocusKind {
55 Only,
56 Even,
57 Just,
58}
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61pub enum MeasureKind {
62 Much,
63 Little,
64}
65
66/// Calendar time units for Span expressions.
67///
68/// These represent variable-length calendar durations, as opposed to
69/// fixed SI time units (ns, ms, s, etc.) used in Duration.
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71pub enum CalendarUnit {
72 Day,
73 Week,
74 Month,
75 Year,
76}
77
78/// Document structure block type markers.
79///
80/// LOGOS uses markdown-style `## Header` syntax to delimit different
81/// sections of a program or proof document.
82#[derive(Debug, Clone, Copy, PartialEq, Eq)]
83pub enum BlockType {
84 /// `## Theorem` - Declares a proposition to be proved.
85 Theorem,
86 /// `## Main` - Program entry point for imperative code.
87 Main,
88 /// `## Definition` - Introduces new terminology or type definitions.
89 Definition,
90 /// `## Proof` - Contains proof steps for a theorem.
91 Proof,
92 /// `## Example` - Illustrative examples.
93 Example,
94 /// `## Logic` - Direct logical notation input.
95 Logic,
96 /// `## Note` - Explanatory documentation.
97 Note,
98 /// `## To` - Function definition block.
99 Function,
100 /// Inline type definition: `## A Point has:` or `## A Color is one of:`.
101 TypeDef,
102 /// `## Policy` - Security policy rule definitions.
103 Policy,
104 /// `## Requires` - External crate dependency declarations.
105 Requires,
106 /// `## Hardware` - Signal declarations for hardware verification.
107 Hardware,
108 /// `## Property` - Temporal assertions for hardware verification.
109 Property,
110 /// `## No` - Optimization annotation (followed by Memo, TCO, Peephole, Borrow, or Optimize).
111 No,
112}
113
114#[derive(Debug, Clone, PartialEq, Eq)]
115pub enum TokenType {
116 // Document Structure
117 BlockHeader { block_type: BlockType },
118
119 // Quantifiers
120 All,
121 No,
122 Some,
123 Any,
124 Both, // Correlative conjunction marker: "both X and Y"
125 Most,
126 Few,
127 Many,
128 Cardinal(u32),
129 AtLeast(u32),
130 AtMost(u32),
131
132 // Negative Polarity Items (NPIs)
133 Anything,
134 Anyone,
135 Nothing,
136 Nobody,
137 NoOne,
138 Nowhere,
139 Ever,
140 Never,
141
142 // Logical Connectives
143 And,
144 Or,
145 If,
146 Then,
147 Not,
148 Iff,
149 Because,
150 /// Temporal binary connective: "P until Q"
151 Until,
152 /// Temporal binary connective: "P release Q" (dual of Until)
153 Release,
154 /// Temporal binary connective: "P weak-until Q" (Until or Always)
155 WeakUntil,
156 /// Compiler-generated implication (e.g., quantifier restrictions from Kripke lowering).
157 /// Distinguished from `If` which represents user-written conditionals.
158 /// This separation gives downstream passes (KG extraction, SVA synthesis)
159 /// irrefutable provenance: `If` = user intent, `Implies` = compiler glue.
160 Implies,
161
162 // Modal Operators
163 Must,
164 Shall,
165 Should,
166 Can,
167 May,
168 Cannot,
169 Would,
170 Could,
171 Might,
172 Had,
173
174 // Imperative Statement Keywords
175 Let,
176 Set,
177 Return,
178 /// Exits the innermost while loop: `Break.`
179 Break,
180 Be,
181 While,
182 Repeat,
183 For,
184 In,
185 From,
186 Assert,
187 /// Documented assertion with justification string.
188 Trust,
189 Otherwise,
190 /// Alias for `Otherwise` - Pythonic else clause
191 Else,
192 /// Python-style else-if shorthand
193 Elif,
194 Call,
195 /// Constructor keyword for struct instantiation.
196 New,
197 /// Sum type definition keyword.
198 Either,
199 /// Pattern matching statement keyword.
200 Inspect,
201 /// Native function modifier for FFI bindings.
202 Native,
203 /// Escape hatch header keyword: "Escape to Rust:"
204 Escape,
205 /// Raw code block captured verbatim from an escape hatch body.
206 /// The Symbol holds the interned raw foreign code (indentation-stripped).
207 EscapeBlock(Symbol),
208
209 // Theorem Keywords
210 /// Premise marker in theorem blocks.
211 Given,
212 /// Goal marker in theorem blocks.
213 Prove,
214 /// Automatic proof strategy directive.
215 Auto,
216
217 // IO Keywords
218 /// "Read input from..."
219 Read,
220 /// "Write x to file..."
221 Write,
222 /// "...from the console"
223 Console,
224 /// "...from file..." or "...to file..."
225 File,
226
227 // Ownership Keywords (Move/Borrow Semantics)
228 /// Move ownership: "Give x to processor"
229 Give,
230 /// Immutable borrow: "Show x to console"
231 Show,
232
233 // Collection Operations
234 /// "Push x to items"
235 Push,
236 /// "Pop from items"
237 Pop,
238 /// "copy of slice" → slice.to_vec()
239 Copy,
240 /// "items 1 through 3" → inclusive slice
241 Through,
242 /// "length of items" → items.len()
243 Length,
244 /// "items at i" → `items[i]`
245 At,
246
247 // Set Operations
248 /// "Add x to set" (insert)
249 Add,
250 /// "Remove x from set"
251 Remove,
252 /// "set contains x"
253 Contains,
254 /// "a union b"
255 Union,
256 /// "a intersection b"
257 Intersection,
258
259 // Memory Management (Zones)
260 /// "Inside a new zone..."
261 Inside,
262 /// "...zone called..."
263 Zone,
264 /// "...called 'Scratch'"
265 Called,
266 /// "...of size 1 MB"
267 Size,
268 /// "...mapped from 'file.bin'"
269 Mapped,
270
271 // Structured Concurrency
272 /// "Attempt all of the following:" → concurrent (async, I/O-bound)
273 Attempt,
274 /// "the following"
275 Following,
276 /// "Simultaneously:" → parallel (CPU-bound)
277 Simultaneously,
278
279 // Agent System (Actor Model)
280 /// "Spawn a Worker called 'w1'" → create agent
281 Spawn,
282 /// "Send Ping to 'agent'" → send message to agent
283 Send,
284 /// "Await response from 'agent' into result" → receive message
285 Await,
286
287 // Serialization
288 /// "A Message is Portable and has:" → serde derives
289 Portable,
290
291 // Sipping Protocol
292 /// "the manifest of Zone" → FileSipper manifest
293 Manifest,
294 /// "the chunk at N in Zone" → FileSipper chunk
295 Chunk,
296
297 // CRDT Keywords
298 /// "A Counter is Shared and has:" → CRDT struct
299 Shared,
300 /// "Merge remote into local" → CRDT merge
301 Merge,
302 /// "Increase x's count by 10" → GCounter increment
303 Increase,
304
305 // Extended CRDT Keywords
306 /// "Decrease x's count by 5" → PNCounter decrement
307 Decrease,
308 /// "which is a Tally" → PNCounter type
309 Tally,
310 /// "which is a SharedSet of T" → ORSet type
311 SharedSet,
312 /// "which is a SharedSequence of T" → RGA type
313 SharedSequence,
314 /// "which is a CollaborativeSequence of T" → YATA type
315 CollaborativeSequence,
316 /// "which is a SharedMap from K to V" → ORMap type
317 SharedMap,
318 /// "which is a Divergent T" → MVRegister type
319 Divergent,
320 /// "Append x to seq" → RGA append
321 Append,
322 /// "Resolve x to value" → MVRegister resolve
323 Resolve,
324 /// "(RemoveWins)" → ORSet bias
325 RemoveWins,
326 /// "(AddWins)" → ORSet bias (default)
327 AddWins,
328 /// "(YATA)" → Sequence algorithm
329 YATA,
330 /// "x's values" → MVRegister values accessor
331 Values,
332
333 // Security Keywords
334 /// "Check that user is admin" → mandatory runtime guard
335 Check,
336
337 // P2P Networking Keywords
338 /// "Listen on \[addr\]" → bind to network address
339 Listen,
340 /// "Connect to \[addr\]" → dial a peer (NetConnect to avoid conflict)
341 NetConnect,
342 /// "Sleep N." → pause execution for N milliseconds
343 Sleep,
344
345 // GossipSub Keywords
346 /// "Sync x on 'topic'" → automatic CRDT replication
347 Sync,
348
349 // Persistence Keywords
350 /// "Mount x at \[path\]" → load/create persistent CRDT from journal
351 Mount,
352 /// "Persistent Counter" → type wrapped with journaling
353 Persistent,
354 /// "x combined with y" → string concatenation
355 Combined,
356
357 // Go-like Concurrency Keywords
358 /// "Launch a task to..." → spawn green thread
359 Launch,
360 /// "a task" → identifier for task context
361 Task,
362 /// "Pipe of Type" → channel creation
363 Pipe,
364 /// "Receive from pipe" → recv from channel
365 Receive,
366 /// "Stop handle" → abort task
367 Stop,
368 /// "Try to send/receive" → non-blocking variant
369 Try,
370 /// "Send value into pipe" → channel send
371 Into,
372 /// "Await the first of:" → select statement
373 First,
374 /// "After N seconds:" → timeout branch
375 After,
376
377 // Block Scoping
378 Colon,
379 Indent,
380 Dedent,
381 Newline,
382
383 // Content Words
384 Noun(Symbol),
385 Adjective(Symbol),
386 NonIntersectiveAdjective(Symbol),
387 Adverb(Symbol),
388 ScopalAdverb(Symbol),
389 TemporalAdverb(Symbol),
390 Verb {
391 lemma: Symbol,
392 time: Time,
393 aspect: Aspect,
394 class: VerbClass,
395 },
396 ProperName(Symbol),
397
398 /// Lexically ambiguous token (e.g., "fish" as noun or verb).
399 ///
400 /// The parser tries the primary interpretation first, then alternatives
401 /// if parsing fails. Used for parse forest generation.
402 Ambiguous {
403 primary: Box<TokenType>,
404 alternatives: Vec<TokenType>,
405 },
406
407 // Speech Acts (Performatives)
408 Performative(Symbol),
409 Exclamation,
410
411 // Articles (Definiteness)
412 Article(Definiteness),
413
414 // Temporal Auxiliaries
415 Auxiliary(Time),
416
417 // Copula & Functional
418 Is,
419 Are,
420 Was,
421 Were,
422 That,
423 Who,
424 What,
425 Where,
426 When,
427 Why,
428 Does,
429 Do,
430
431 // Identity & Reflexive (FOL)
432 Identity,
433 Equals,
434 Reflexive,
435 Reciprocal,
436 /// Pairwise list coordination: "A and B respectively love C and D"
437 Respectively,
438
439 // Pronouns (Discourse)
440 Pronoun {
441 gender: Gender,
442 number: Number,
443 case: Case,
444 },
445
446 // Prepositions (for N-ary relations)
447 Preposition(Symbol),
448
449 // Phrasal Verb Particles (up, down, out, in, off, on, away)
450 Particle(Symbol),
451
452 // Comparatives & Superlatives (Pillar 3 - Degree Semantics)
453 Comparative(Symbol),
454 Superlative(Symbol),
455 Than,
456
457 // Control Verbs (Chomsky's Control Theory)
458 To,
459
460 // Presupposition Triggers (Austin/Strawson)
461 PresupTrigger(PresupKind),
462
463 // Focus Particles (Rooth)
464 Focus(FocusKind),
465
466 // Mass Noun Measure
467 Measure(MeasureKind),
468
469 // Numeric Literals (prover-ready: stores raw string for symbolic math)
470 Number(Symbol),
471
472 /// Duration literal with SI suffix: 500ms, 2s, 50ns
473 /// Stores the value normalized to nanoseconds and preserves the original unit.
474 DurationLiteral {
475 nanos: i64,
476 original_unit: Symbol,
477 },
478
479 /// Date literal in ISO-8601 format: 2026-05-20
480 /// Stores days since Unix epoch (1970-01-01).
481 DateLiteral {
482 days: i32,
483 },
484
485 /// Time-of-day literal: 4pm, 9:30am, noon, midnight
486 /// Stores nanoseconds from midnight (00:00:00).
487 TimeLiteral {
488 nanos_from_midnight: i64,
489 },
490
491 /// Calendar time unit word: day, week, month, year (or plurals)
492 /// Used in Span expressions like "3 days" or "2 months and 5 days"
493 CalendarUnit(CalendarUnit),
494
495 /// Postfix operator for relative past time: "3 days ago"
496 Ago,
497
498 /// Postfix operator for relative future time: "3 days hence"
499 Hence,
500
501 /// Binary operator for span subtraction from a date: "3 days before 2026-05-20"
502 Before,
503
504 /// String literal: `"hello world"`
505 StringLiteral(Symbol),
506
507 /// Interpolated string literal: `"Hello, {name}!"`
508 /// Contains raw content with {} holes preserved
509 InterpolatedString(Symbol),
510
511 // Character literal: `x` (backtick syntax)
512 CharLiteral(Symbol),
513
514 // Index Access (1-indexed)
515 Item,
516 Items,
517
518 // Possession (Genitive Case)
519 Possessive,
520
521 // Punctuation
522 LParen,
523 RParen,
524 LBracket,
525 RBracket,
526 Comma,
527 Period,
528
529 // Bitwise Operators
530 /// "x xor y" → bitwise XOR (`^`)
531 Xor,
532 /// "x shifted left/right by y" → bit shift (`<<`/`>>`)
533 Shifted,
534
535 // Arithmetic Operators
536 Plus,
537 Minus,
538 Star,
539 Slash,
540 Percent, // Modulo operator
541
542 // Comparison Operators
543 /// `<`
544 Lt,
545 /// `>`
546 Gt,
547 /// `<=`
548 LtEq,
549 /// `>=`
550 GtEq,
551 /// `==`
552 EqEq,
553 /// `!=`
554 NotEq,
555
556 /// Arrow for return type syntax: `->`
557 Arrow,
558
559 /// Assignment operator `=` for `identifier = value` syntax
560 Assign,
561
562 /// Mutability keyword `mut` for explicit mutable declarations
563 Mut,
564
565 /// Generic identifier (for equals-style assignment)
566 Identifier,
567
568 EOF,
569}
570
571#[derive(Debug, Clone)]
572pub struct Token {
573 pub kind: TokenType,
574 pub lexeme: Symbol,
575 pub span: Span,
576}
577
578impl Token {
579 pub fn new(kind: TokenType, lexeme: Symbol, span: Span) -> Self {
580 Token { kind, lexeme, span }
581 }
582}
583
584impl TokenType {
585 pub const WH_WORDS: &'static [TokenType] = &[
586 TokenType::Who,
587 TokenType::What,
588 TokenType::Where,
589 TokenType::When,
590 TokenType::Why,
591 ];
592
593 pub const MODALS: &'static [TokenType] = &[
594 TokenType::Must,
595 TokenType::Shall,
596 TokenType::Should,
597 TokenType::Can,
598 TokenType::May,
599 TokenType::Cannot,
600 TokenType::Would,
601 TokenType::Could,
602 TokenType::Might,
603 ];
604}
605
606#[cfg(test)]
607mod tests {
608 use super::*;
609
610 #[test]
611 fn span_new_stores_positions() {
612 let span = Span::new(5, 10);
613 assert_eq!(span.start, 5);
614 assert_eq!(span.end, 10);
615 }
616
617 #[test]
618 fn span_default_is_zero() {
619 let span = Span::default();
620 assert_eq!(span.start, 0);
621 assert_eq!(span.end, 0);
622 }
623
624 #[test]
625 fn token_has_span_field() {
626 use logicaffeine_base::Interner;
627 let mut interner = Interner::new();
628 let lexeme = interner.intern("test");
629 let token = Token::new(TokenType::Noun(lexeme), lexeme, Span::new(0, 4));
630 assert_eq!(token.span.start, 0);
631 assert_eq!(token.span.end, 4);
632 }
633
634 #[test]
635 fn wh_words_contains_all_wh_tokens() {
636 assert_eq!(TokenType::WH_WORDS.len(), 5);
637 assert!(TokenType::WH_WORDS.contains(&TokenType::Who));
638 assert!(TokenType::WH_WORDS.contains(&TokenType::What));
639 assert!(TokenType::WH_WORDS.contains(&TokenType::Where));
640 assert!(TokenType::WH_WORDS.contains(&TokenType::When));
641 assert!(TokenType::WH_WORDS.contains(&TokenType::Why));
642 }
643
644 #[test]
645 fn modals_contains_all_modal_tokens() {
646 assert_eq!(TokenType::MODALS.len(), 9);
647 assert!(TokenType::MODALS.contains(&TokenType::Must));
648 assert!(TokenType::MODALS.contains(&TokenType::Shall));
649 assert!(TokenType::MODALS.contains(&TokenType::Should));
650 assert!(TokenType::MODALS.contains(&TokenType::Can));
651 assert!(TokenType::MODALS.contains(&TokenType::May));
652 assert!(TokenType::MODALS.contains(&TokenType::Cannot));
653 assert!(TokenType::MODALS.contains(&TokenType::Would));
654 assert!(TokenType::MODALS.contains(&TokenType::Could));
655 assert!(TokenType::MODALS.contains(&TokenType::Might));
656 }
657}