-- S-expression parser implementation. with Ada.Text_IO; use Ada.Text_IO; package body Parser is -- Predicates on input characters WhitespaceP : constant array (Character) of Boolean := (' ' | ASCII.Nul | ASCII.HT | ASCII.CR | ASCII.LF => True, others => False); DigitP : constant array (Character) of Boolean := ('0' .. '9' => True, others => False); -- Reserved characters, although '.' and '#' can technically speaking -- be part of symbol names. ReservedP : constant array (Character) of Boolean := ('(' | ')' | '#' | '.' | ''' => True, others => False); C : Character := ' '; I : Long_Integer; -- Given a string, check if all its characters are digits. If so, -- accumulate them in I. procedure Parse_Integer(Str : in MemPtr; Success : out Boolean; I : out Long_Integer) is P : MemPtr := Str; PC : MemPtr; C : Character; Negative : Boolean := False; Result : Long_Integer; begin Result := 0; Success := True; pragma Assert (P /= 0, "Parse_Integer received a NIL string!"); -- Check for leading +/- signs first PC := Get_Car(AMem(P)); C := Get_Char(AMem(PC)); if C = '-' then Negative := True; end if; if C = '-' or C = '+' then P := Get_Cdr(AMem(P)); -- If we don't have other characters after + or -, then this is -- not a number. if P = 0 then Success := False; return; end if; end if; while P /= 0 loop -- Check list and its car. PC := Get_Car(AMem(P)); C := Get_Char(AMem(PC)); -- Do we have a non-digit? if not DigitP(C) then Success := False; exit; end if; -- If we're still in the all-digits game, gather them, hoping -- we get a number. Result := Result * 10 + (Character'Pos(C) - Character'Pos('0')); -- Move on. P := Get_Cdr(AMem(P)); end loop; if Negative then Result := -Result; end if; -- Set the output value if and only if we succeeded. if Success then I := Result; end if; end Parse_Integer; procedure Eat_Whitespace is begin while WhitespaceP(C) loop Get(C); end loop; end Eat_Whitespace; -- Parse a list of characters that may be a symbol or an integer. procedure Parse_Atom(P : out MemPtr) is CharP, TempP : MemPtr; ListP : MemPtr := 0; ListTailP : MemPtr := 0; begin pragma Assert (not ReservedP(C), "Parse_Atom received a reserved character!"); loop -- Get a new char cell Alloc_Char(C, CharP); -- Save old list tail TempP := ListTailP; -- Cons cell to be appended to the list Alloc_Cons(CharP, 0, ListTailP); -- Does the old list tail point to a cons cell? If so, set the -- old cdr to the new list tail, otherwise set the list -- pointer to the tail. if TempP /= 0 then Set_Cdr(AMem(TempP), ListTailP); else ListP := ListTailP; end if; -- Get a new character and verify the exit condition Get(C); exit when WhitespaceP(C); exit when C = '(' or C = ')'; end loop; -- Assign output parameter to our list. P := ListP; end Parse_Atom; -- Parse hash-prepended expression. procedure Parse_Hash(P : out MemPtr; TID : out TokenID) is begin pragma Assert (C = '#', "Parse_Hash does not begin with a hash."); -- We support the following hash-prepended expressions: -- -- . booleans (#t or #f) -- . characters (e.g. #\a, #\b, ...) Get(C); if (C = 't') then -- true Alloc_Bool(True, P); TID := Bool_Token; elsif (C = 'f') then -- false Alloc_Bool(False, P); TID := Bool_Token; elsif (C = '\') then -- char -- XXX should do more elaborate parsing here, e.g. #\space -- etc. Get(C); Alloc_Char(C, P); TID := Char_Token; else -- unknown pragma Assert (False, "Unknown hash expression."); end if; -- Emulate a space for future calls of Parse C := ' '; end Parse_Hash; -- Parse cons objects, i.e. lists and pairs. procedure Parse_Cons(P : out MemPtr) is ListP : MemPtr := 0; ListTailP : MemPtr := 0; TID : TokenID; begin pragma Assert (C = '(', "Parse_Cons should receive an open paren."); -- Emulate a space for the first call to Parse C := ' '; -- Iterate through the list elements and add them to the list. loop declare ElemP, TempP : MemPtr; begin -- Parse current element Parse(ElemP, TID); -- Exit conditions exit when TID = ListE_Token; -- list end exit when TID = ListP_Token; -- pair marker -- Save old list tail TempP := ListTailP; -- Add new element to the list: if the TempP is NIL, then we -- have a fresh list that we can populate with the new -- element. Alloc_Cons(ElemP, 0, ListTailP); if TempP = 0 then ListP := ListTailP; else Set_Cdr(AMem(TempP), ListTailP); end if; end; end loop; -- If we received a pair marker, then we have one more element to -- parse. if TID = ListP_Token then declare ElemP : MemPtr; begin pragma Assert (ListTailP /= 0, "Syntax error parsing pair."); -- Emulate space C := ' '; -- Parse element Parse(ElemP, TID); pragma Assert(TID = Bool_Token or TID = Num_Token or TID = List_Token or TID = Char_Token or TID = Symbol_Token, "Syntax error parsing pair."); -- Point cdr of list tail to element Set_Cdr(AMem(ListTailP), ElemP); end; elsif TID /= ListE_Token then Put_Line("Impossible to get here!"); P := 0; return; end if; -- Set the output and emulate a space for whatever comes next. P := ListP; C := ' '; end Parse_Cons; -- Parse quoted S-expression. procedure Parse_Quoted(P : out MemPtr; TID : out TokenID) is ExprP : MemPtr; begin pragma Assert (C = ''', "Parse_Quoted not given a quote"); -- Emulate a space and parse whatever comes after the quote. C := ' '; Parse(ExprP, TID); -- If the result of Parse is an atomic constant, we return it as -- it is. Otherwise we wrap whatever expr we get in a quote, -- i.e. (quote expr). case TID is when Bool_Token | Num_Token | Char_Token => P := ExprP; when others => -- (expr) Alloc_Cons(ExprP, 0, ExprP); -- (quote expr) Alloc_Cons(Quote_Sym, ExprP, ExprP); -- Assign output P := ExprP; TID := Quoted_Token; end case; end Parse_Quoted; -- Parse any S-expression procedure Parse(P : out MemPtr; TID : out TokenID) is TempTID : TokenID := Error_Token; AtomP : MemPtr; TempP1, TempP2, TempP3 : MemPtr := 0; Is_Number : Boolean; begin P := 0; TID := Error_Token; Eat_Whitespace; -- Not (, ), #, . or ' if not ReservedP(C) then -- atom -- Read atom from stdin Parse_Atom(AtomP); -- Try to parse a number; if we succeed, then return it, else -- return the atom as an interned symbol. Parse_Integer(AtomP, Is_Number, I); if Is_Number then Alloc_Fixnum(I, P); TID := Num_Token; else -- Alloc symbol cell Lookup_Or_Create_Symbol(AtomP, P); TID := Symbol_Token; end if; elsif C = '#' then -- hash token Parse_Hash(P, TID); elsif C = '(' then -- list/pair Parse_Cons(P); -- Instantiate TID TID := List_Token; elsif C = ')' then -- list/pair end TID := ListE_Token; elsif C = '.' then -- pair marker TID := ListP_Token; elsif C = ''' then -- quote marker Parse_Quoted(P, TID); end if; end Parse; end Parser;