raw
adalisp_genesis         1 -- S-expression parser implementation.
adalisp_genesis 2 with Ada.Text_IO; use Ada.Text_IO;
adalisp_genesis 3
adalisp_genesis 4 package body Parser is
adalisp_genesis 5 -- Predicates on input characters
adalisp_genesis 6 WhitespaceP : constant array (Character) of Boolean :=
adalisp_genesis 7 (' ' | ASCII.Nul | ASCII.HT | ASCII.CR | ASCII.LF => True, others => False);
adalisp_genesis 8 DigitP : constant array (Character) of Boolean :=
adalisp_genesis 9 ('0' .. '9' => True, others => False);
adalisp_genesis 10
adalisp_genesis 11 -- Reserved characters, although '.' and '#' can technically speaking
adalisp_genesis 12 -- be part of symbol names.
adalisp_genesis 13 ReservedP : constant array (Character) of Boolean :=
adalisp_genesis 14 ('(' | ')' | '#' | '.' | ''' => True, others => False);
adalisp_genesis 15
adalisp_genesis 16 C : Character := ' ';
adalisp_genesis 17 I : Long_Integer;
adalisp_genesis 18
adalisp_genesis 19 -- Given a string, check if all its characters are digits. If so,
adalisp_genesis 20 -- accumulate them in I.
adalisp_genesis 21 procedure Parse_Integer(Str : in MemPtr;
adalisp_genesis 22 Success : out Boolean;
adalisp_genesis 23 I : out Long_Integer) is
adalisp_genesis 24 P : MemPtr := Str;
adalisp_genesis 25 PC : MemPtr;
adalisp_genesis 26 C : Character;
adalisp_genesis 27 Negative : Boolean := False;
adalisp_genesis 28 Result : Long_Integer;
adalisp_genesis 29 begin
adalisp_genesis 30 Result := 0;
adalisp_genesis 31 Success := True;
adalisp_genesis 32
adalisp_genesis 33 pragma Assert (P /= 0, "Parse_Integer received a NIL string!");
adalisp_genesis 34
adalisp_genesis 35 -- Check for leading +/- signs first
adalisp_genesis 36 PC := Get_Car(AMem(P));
adalisp_genesis 37 C := Get_Char(AMem(PC));
adalisp_genesis 38 if C = '-' then
adalisp_genesis 39 Negative := True;
adalisp_genesis 40 end if;
adalisp_genesis 41 if C = '-' or C = '+' then
adalisp_genesis 42 P := Get_Cdr(AMem(P));
adalisp_genesis 43 -- If we don't have other characters after + or -, then this is
adalisp_genesis 44 -- not a number.
adalisp_genesis 45 if P = 0 then
adalisp_genesis 46 Success := False;
adalisp_genesis 47 return;
adalisp_genesis 48 end if;
adalisp_genesis 49 end if;
adalisp_genesis 50
adalisp_genesis 51 while P /= 0 loop
adalisp_genesis 52 -- Check list and its car.
adalisp_genesis 53 PC := Get_Car(AMem(P));
adalisp_genesis 54 C := Get_Char(AMem(PC));
adalisp_genesis 55
adalisp_genesis 56 -- Do we have a non-digit?
adalisp_genesis 57 if not DigitP(C) then
adalisp_genesis 58 Success := False;
adalisp_genesis 59 exit;
adalisp_genesis 60 end if;
adalisp_genesis 61
adalisp_genesis 62 -- If we're still in the all-digits game, gather them, hoping
adalisp_genesis 63 -- we get a number.
adalisp_genesis 64 Result := Result * 10 + (Character'Pos(C) - Character'Pos('0'));
adalisp_genesis 65
adalisp_genesis 66 -- Move on.
adalisp_genesis 67 P := Get_Cdr(AMem(P));
adalisp_genesis 68 end loop;
adalisp_genesis 69
adalisp_genesis 70 if Negative then
adalisp_genesis 71 Result := -Result;
adalisp_genesis 72 end if;
adalisp_genesis 73
adalisp_genesis 74 -- Set the output value if and only if we succeeded.
adalisp_genesis 75 if Success then
adalisp_genesis 76 I := Result;
adalisp_genesis 77 end if;
adalisp_genesis 78 end Parse_Integer;
adalisp_genesis 79
adalisp_genesis 80 procedure Eat_Whitespace is
adalisp_genesis 81 begin
adalisp_genesis 82 while WhitespaceP(C) loop
adalisp_genesis 83 Get(C);
adalisp_genesis 84 end loop;
adalisp_genesis 85 end Eat_Whitespace;
adalisp_genesis 86
adalisp_genesis 87 -- Parse a list of characters that may be a symbol or an integer.
adalisp_genesis 88 procedure Parse_Atom(P : out MemPtr) is
adalisp_genesis 89 CharP, TempP : MemPtr;
adalisp_genesis 90 ListP : MemPtr := 0;
adalisp_genesis 91 ListTailP : MemPtr := 0;
adalisp_genesis 92 begin
adalisp_genesis 93 pragma Assert (not ReservedP(C),
adalisp_genesis 94 "Parse_Atom received a reserved character!");
adalisp_genesis 95
adalisp_genesis 96 loop
adalisp_genesis 97 -- Get a new char cell
adalisp_genesis 98 Alloc_Char(C, CharP);
adalisp_genesis 99 -- Save old list tail
adalisp_genesis 100 TempP := ListTailP;
adalisp_genesis 101 -- Cons cell to be appended to the list
adalisp_genesis 102 Alloc_Cons(CharP, 0, ListTailP);
adalisp_genesis 103
adalisp_genesis 104 -- Does the old list tail point to a cons cell? If so, set the
adalisp_genesis 105 -- old cdr to the new list tail, otherwise set the list
adalisp_genesis 106 -- pointer to the tail.
adalisp_genesis 107 if TempP /= 0 then
adalisp_genesis 108 Set_Cdr(AMem(TempP), ListTailP);
adalisp_genesis 109 else
adalisp_genesis 110 ListP := ListTailP;
adalisp_genesis 111 end if;
adalisp_genesis 112
adalisp_genesis 113 -- Get a new character and verify the exit condition
adalisp_genesis 114 Get(C);
adalisp_genesis 115 exit when WhitespaceP(C);
adalisp_genesis 116 exit when C = '(' or C = ')';
adalisp_genesis 117 end loop;
adalisp_genesis 118 -- Assign output parameter to our list.
adalisp_genesis 119 P := ListP;
adalisp_genesis 120 end Parse_Atom;
adalisp_genesis 121
adalisp_genesis 122 -- Parse hash-prepended expression.
adalisp_genesis 123 procedure Parse_Hash(P : out MemPtr; TID : out TokenID) is
adalisp_genesis 124 begin
adalisp_genesis 125 pragma Assert (C = '#',
adalisp_genesis 126 "Parse_Hash does not begin with a hash.");
adalisp_genesis 127
adalisp_genesis 128 -- We support the following hash-prepended expressions:
adalisp_genesis 129 --
adalisp_genesis 130 -- . booleans (#t or #f)
adalisp_genesis 131 -- . characters (e.g. #\a, #\b, ...)
adalisp_genesis 132
adalisp_genesis 133 Get(C);
adalisp_genesis 134
adalisp_genesis 135 if (C = 't') then -- true
adalisp_genesis 136 Alloc_Bool(True, P);
adalisp_genesis 137 TID := Bool_Token;
adalisp_genesis 138 elsif (C = 'f') then -- false
adalisp_genesis 139 Alloc_Bool(False, P);
adalisp_genesis 140 TID := Bool_Token;
adalisp_genesis 141 elsif (C = '\') then -- char
adalisp_genesis 142 -- XXX should do more elaborate parsing here, e.g. #\space
adalisp_genesis 143 -- etc.
adalisp_genesis 144 Get(C);
adalisp_genesis 145 Alloc_Char(C, P);
adalisp_genesis 146 TID := Char_Token;
adalisp_genesis 147 else -- unknown
adalisp_genesis 148 pragma Assert (False, "Unknown hash expression.");
adalisp_genesis 149 end if;
adalisp_genesis 150
adalisp_genesis 151 -- Emulate a space for future calls of Parse
adalisp_genesis 152 C := ' ';
adalisp_genesis 153 end Parse_Hash;
adalisp_genesis 154
adalisp_genesis 155 -- Parse cons objects, i.e. lists and pairs.
adalisp_genesis 156 procedure Parse_Cons(P : out MemPtr) is
adalisp_genesis 157 ListP : MemPtr := 0;
adalisp_genesis 158 ListTailP : MemPtr := 0;
adalisp_genesis 159 TID : TokenID;
adalisp_genesis 160 begin
adalisp_genesis 161 pragma Assert (C = '(', "Parse_Cons should receive an open paren.");
adalisp_genesis 162
adalisp_genesis 163 -- Emulate a space for the first call to Parse
adalisp_genesis 164 C := ' ';
adalisp_genesis 165
adalisp_genesis 166 -- Iterate through the list elements and add them to the list.
adalisp_genesis 167 loop
adalisp_genesis 168 declare
adalisp_genesis 169 ElemP, TempP : MemPtr;
adalisp_genesis 170 begin
adalisp_genesis 171 -- Parse current element
adalisp_genesis 172 Parse(ElemP, TID);
adalisp_genesis 173
adalisp_genesis 174 -- Exit conditions
adalisp_genesis 175 exit when TID = ListE_Token; -- list end
adalisp_genesis 176 exit when TID = ListP_Token; -- pair marker
adalisp_genesis 177
adalisp_genesis 178 -- Save old list tail
adalisp_genesis 179 TempP := ListTailP;
adalisp_genesis 180 -- Add new element to the list: if the TempP is NIL, then we
adalisp_genesis 181 -- have a fresh list that we can populate with the new
adalisp_genesis 182 -- element.
adalisp_genesis 183 Alloc_Cons(ElemP, 0, ListTailP);
adalisp_genesis 184 if TempP = 0 then
adalisp_genesis 185 ListP := ListTailP;
adalisp_genesis 186 else
adalisp_genesis 187 Set_Cdr(AMem(TempP), ListTailP);
adalisp_genesis 188 end if;
adalisp_genesis 189 end;
adalisp_genesis 190 end loop;
adalisp_genesis 191
adalisp_genesis 192 -- If we received a pair marker, then we have one more element to
adalisp_genesis 193 -- parse.
adalisp_genesis 194 if TID = ListP_Token then
adalisp_genesis 195 declare
adalisp_genesis 196 ElemP : MemPtr;
adalisp_genesis 197 begin
adalisp_genesis 198 pragma Assert (ListTailP /= 0, "Syntax error parsing pair.");
adalisp_genesis 199 -- Emulate space
adalisp_genesis 200 C := ' ';
adalisp_genesis 201 -- Parse element
adalisp_genesis 202 Parse(ElemP, TID);
adalisp_genesis 203
adalisp_genesis 204 pragma Assert(TID = Bool_Token or TID = Num_Token or
adalisp_genesis 205 TID = List_Token or TID = Char_Token or
adalisp_genesis 206 TID = Symbol_Token,
adalisp_genesis 207 "Syntax error parsing pair.");
adalisp_genesis 208 -- Point cdr of list tail to element
adalisp_genesis 209 Set_Cdr(AMem(ListTailP), ElemP);
adalisp_genesis 210 end;
adalisp_genesis 211 elsif TID /= ListE_Token then
adalisp_genesis 212 Put_Line("Impossible to get here!");
adalisp_genesis 213 P := 0;
adalisp_genesis 214 return;
adalisp_genesis 215 end if;
adalisp_genesis 216
adalisp_genesis 217 -- Set the output and emulate a space for whatever comes next.
adalisp_genesis 218 P := ListP;
adalisp_genesis 219 C := ' ';
adalisp_genesis 220 end Parse_Cons;
adalisp_genesis 221
adalisp_genesis 222 -- Parse quoted S-expression.
adalisp_genesis 223 procedure Parse_Quoted(P : out MemPtr; TID : out TokenID) is
adalisp_genesis 224 ExprP : MemPtr;
adalisp_genesis 225 begin
adalisp_genesis 226 pragma Assert (C = ''', "Parse_Quoted not given a quote");
adalisp_genesis 227
adalisp_genesis 228 -- Emulate a space and parse whatever comes after the quote.
adalisp_genesis 229 C := ' ';
adalisp_genesis 230 Parse(ExprP, TID);
adalisp_genesis 231
adalisp_genesis 232 -- If the result of Parse is an atomic constant, we return it as
adalisp_genesis 233 -- it is. Otherwise we wrap whatever expr we get in a quote,
adalisp_genesis 234 -- i.e. (quote expr).
adalisp_genesis 235 case TID is
adalisp_genesis 236 when Bool_Token | Num_Token | Char_Token =>
adalisp_genesis 237 P := ExprP;
adalisp_genesis 238 when others =>
adalisp_genesis 239 -- (expr)
adalisp_genesis 240 Alloc_Cons(ExprP, 0, ExprP);
adalisp_genesis 241 -- (quote expr)
adalisp_genesis 242 Alloc_Cons(Quote_Sym, ExprP, ExprP);
adalisp_genesis 243 -- Assign output
adalisp_genesis 244 P := ExprP;
adalisp_genesis 245 TID := Quoted_Token;
adalisp_genesis 246 end case;
adalisp_genesis 247 end Parse_Quoted;
adalisp_genesis 248
adalisp_genesis 249 -- Parse any S-expression
adalisp_genesis 250 procedure Parse(P : out MemPtr; TID : out TokenID) is
adalisp_genesis 251 TempTID : TokenID := Error_Token;
adalisp_genesis 252 AtomP : MemPtr;
adalisp_genesis 253 TempP1, TempP2, TempP3 : MemPtr := 0;
adalisp_genesis 254 Is_Number : Boolean;
adalisp_genesis 255 begin
adalisp_genesis 256 P := 0;
adalisp_genesis 257 TID := Error_Token;
adalisp_genesis 258
adalisp_genesis 259 Eat_Whitespace;
adalisp_genesis 260
adalisp_genesis 261 -- Not (, ), #, . or '
adalisp_genesis 262 if not ReservedP(C) then -- atom
adalisp_genesis 263 -- Read atom from stdin
adalisp_genesis 264 Parse_Atom(AtomP);
adalisp_genesis 265
adalisp_genesis 266 -- Try to parse a number; if we succeed, then return it, else
adalisp_genesis 267 -- return the atom as an interned symbol.
adalisp_genesis 268 Parse_Integer(AtomP, Is_Number, I);
adalisp_genesis 269 if Is_Number then
adalisp_genesis 270 Alloc_Fixnum(I, P);
adalisp_genesis 271 TID := Num_Token;
adalisp_genesis 272 else
adalisp_genesis 273 -- Alloc symbol cell
adalisp_genesis 274 Lookup_Or_Create_Symbol(AtomP, P);
adalisp_genesis 275 TID := Symbol_Token;
adalisp_genesis 276 end if;
adalisp_genesis 277 elsif C = '#' then -- hash token
adalisp_genesis 278 Parse_Hash(P, TID);
adalisp_genesis 279 elsif C = '(' then -- list/pair
adalisp_genesis 280 Parse_Cons(P);
adalisp_genesis 281 -- Instantiate TID
adalisp_genesis 282 TID := List_Token;
adalisp_genesis 283 elsif C = ')' then -- list/pair end
adalisp_genesis 284 TID := ListE_Token;
adalisp_genesis 285 elsif C = '.' then -- pair marker
adalisp_genesis 286 TID := ListP_Token;
adalisp_genesis 287 elsif C = ''' then -- quote marker
adalisp_genesis 288 Parse_Quoted(P, TID);
adalisp_genesis 289 end if;
adalisp_genesis 290 end Parse;
adalisp_genesis 291 end Parser;