An introduction to Functional Programming in F# -- Part 3: Interpreters
Last time, we developed a parser for a simple LISP style language; now we develop from the abstract syntax tree the mechanism whereby we can actually evaluate expressions (programs) written in it.
Topics covered in the series
- Introduction to Functional Programming via circuits
- Lexing and Parsing
- Interpreters (this part)
- Simple ‘Functional’ Distributed/Concurrent Programming
- Non-Functional bits - Assignment, Exceptions, Objects, Libraries, etc.
- Advanced stuff - Continuations, Monads, Generative Programming (Language Based programming), etc.
A simple language
We define the behaviour of our language on top of the abstract syntax thus:
Expr = Example
(LET list_of_dotted_pairs EXPR) | (Let ((x . 1) (y. 2)) (Add x y))
(IF EXPR EXPR EXPR) | (IF (EQ x y) x (CONS x y))
(LAMBDA list_of_variables EXPR) | (LAMBDA (x y) (ADD x y 1))
(REC variable list_of_variables EXPR) | (REC F (N) (IF (EQ x 0) 1 (MULT N (F (SUB N 1)))))
(QUOTE EXPR) | (QUOTE (1 2 3))
(EVAL EXPR) | (EVAL (QUOTE (ADD 1 2)))
(EVAL EXPR environment_expression) | (EVAL (QUOTE (ADD X 1)) (QUOTE ((X . 2))))
(EXPR EXPR*) | (ADD (LENGTH L) (LENGTH J) 3)
SYMBOL | X
NUMBER 3
where the coloured background denotes special forms (keyword related constructs) in the language, and the rest are call-by-value expressions. The lower-case descriptive names represent the following expansions
list_of_dotted_pairs = ((EXPR . EXPR)*)
list_of_variables = ( SYMBOL*)
variable = SYMBOL
environment_expression = EXPR
In code, we extend the algebraic type we used last time with the notion of a function
type SEXPR = SYMBOL of string | NUMBER of int | NIL | CONS of (SEXPR * SEXPR) | COMPUTATION of COMP | |
and COMP = { func : SEXPR -> SEXPR } ;; |
in the form of the COMP
type; and modify the value of a SYMBOL
to be a string.
Aside: the value of a COMP
type is a record, a series of named fields gathered together. In this case there is one field, and that has a value that is a function mapping one SEXPR
to another.
The Structure of the Evaluator
In pseudocode terms, given an expression as above, an interpreter would evaluate it with a recursive function eval
that looks like this:
To eval
an expression e
in an environment (a construct where symbol definitions live) env
- if
e
isNIL
thenNIL
- else if
e
is a numbern
thenn
- else if
e
is aSYMBOL
then look it up in the environment - else if
e
is a special form then evaluate special form - else if
e
is of form(expr1 expr2 ... exprN)
then(eval expr1 env) (eval expr2 env) ... (eval exprN env)
- else
ERROR
The Structure of the Environment
We introduced the notion of an environment as where symbol definitions live; we express it as a look-up table from symbol to value like this (where we continue to use the list as our standard functional data-structure):
so that structurally the environment is an SEXPR which itself is a list of pairs of SYMBOL * SEXPR.
In code, the evaluator looks like this
let rec eval e x = | |
match x with | |
// Call by value cases | |
NIL -> NIL | |
|(SYMBOL s) -> lookup (SYMBOL s) e | |
|(NUMBER i) -> (NUMBER i) | |
// "IF" special form | |
|CONS((SYMBOL "IF"), b) -> match b with | |
CONS(cond, CONS(thenpart, CONS (elsepart, NIL))) -> | |
match (eval e cond) with | |
| NIL -> (eval e elsepart) | |
| _ -> (eval e thenpart) | |
| _ -> ERROR2 "IF" b | |
// "LET" special form | |
|CONS((SYMBOL "LET"), b) -> match b with | |
CONS(bindings, CONS(body, NIL)) -> | |
let pairs = applySnd (eval e) bindings | |
eval (APPEND pairs e) body | |
| _ -> ERROR2 "LET" b | |
// "LAMBDA" special form | |
|CONS((SYMBOL "LAMBDA"),b) -> match b with | |
CONS(formals, CONS(body, NIL)) -> | |
COMPUTATION {func = fun actuals -> (let pairs = ZIP formals actuals | |
eval (APPEND pairs e) body)} | |
| _ -> ERROR2 "LAMBDA" b | |
// "REC" special form | |
|CONS((SYMBOL "REC"),b) -> match b with | |
CONS(F, CONS(formals, CONS(body, NIL))) -> | |
let rec f = | |
COMPUTATION {func = fun actuals -> (let pairs = ZIP formals actuals | |
eval (APPEND pairs (CONS(CONS(F, f), e))) body)} | |
f | |
| _ -> ERROR2 "REC" b | |
// "QUOTE" special form | |
|CONS((SYMBOL "QUOTE"),CONS(X,NIL)) -> X | |
// EXTRAS | |
|CONS((SYMBOL "ENV"), NIL) -> e | |
|CONS((SYMBOL "EVAL"), CONS(X, NIL)) -> eval NIL (eval NIL X) | |
|CONS((SYMBOL "EVAL"), CONS(X, CONS(Y, NIL))) -> eval (eval NIL Y) (eval NIL X) | |
// Application | |
|CONS(a, b) -> apply (eval e a) (MAP (eval e) b) | |
|x -> ERROR1 x;; |
Note: Unlike our previous examples this doesn't just paste straight into the interpreter and work, as it has dangling references. The full set of new source appears at the end of this post.
The function starts out with the call by value cases from our pseudo-code; their behaviour should be obvious, given a suitable definition of lookup
that walks the environment and returns the matching SEXPR
or a suitable default if not found.
The Special forms
Our special form IF
works like
eval (IF E1 E2 E3) env = if (eval E1 env) then (eval E2 env) else (eval E3 env) |
where we have made the design decision that all values are "truthy" except NIL
. We have to match on our predicate because, as an implementation detail, we can't test functions for equality.
Our special form LET
introduces functions applySnd
which takes the list of dotted pairs represented as SEXPR * SEXPR
, and recursively evaluates the second of each pair, as in
let rec applySnd f l = | |
match l with | |
[] -> [] | |
| (x,y)::xs -> (x, f y) :: (applySnd f xs);; |
and APPEND
which takes a list of SEXPR * SEXPR
and returns that list merges that with the input environment (another list of SEXPR * SEXPR
).
Having generated a new environment by merging all the evaluated bindings, i.e. (APPEND pairs e)
, it then evaluates the body of the expression in that new environment.
Our special form LAMBDA
behaves similarly, creating a function that when executed makes local bindings from the formal parameters to the corresponding values (ZIP
being the standard operation to map a pair of lists to a list of pairs), and uses that to define a new environment for evaluating the body.
The special form REC
returns a self-referencing computation, but is otherwise similar to the LAMBDA
form. Note that f
is not itself a function; this use emphasises that the F# rec
keyword merely brings a variable into scope inside its own definition. so the definition can be recursive.
The special form QUOTE
returns the input expression unevaluated.
The "Extras", or "code is data is code"
The evaluator provides access to the environment from within the language; symbol ENV
yields the environment as a list of dotted pairs.
The symbol EVAL
with one argument evaluates an expression within the current environment for the interpreter, and with two arguments, allows a different environment to be specified. This is how data becomes code -- it is able to cause itself self to execute.
Remaining cases -- Functions and errors
Function application is modelled by taking a fully evaluated list, whose first element is expected to be a COMPUTATION
and applying it to the remainder of the list of arguments.
In the code
apply (eval e a) (MAP (eval e) b) |
MAP
applies the evaluator to each member of the list b
to achieve this fully evaluated state; and apply
looks like
let apply f l = | |
match f with | |
COMPUTATION { func = f' } -> f' l | |
| ERROR CONS(f, l);; |
Built-in functions are represented in this model by providing a standard environment with computations defined.
Anything else that is superficially syntactically valid but matches none of the expected forms results in an error.
Exercises
- Modify the parser from last time to work with the new definition of
SEXPR
- Add Boolean operators
AND, OR, NOT, EQUIV, IMPLIES
to evaluator - Add built in functions to map between SYMBOLS and List of Characters and back again.
- Add
QUASIQUOTE (QQ), UNQUOTE
with the meaning that(QQ (ADD (UNQUOTE X) (UNQUOTE Y) 23))
in an environment whereX
andY
are123
andZ
respectively evaluates to the S-expression(ADD 123 Z 23)
- (Advanced): Change the evaluator to allow deal with interpreted code in the environment e.g.
F
is bound to(INTERPRET E)
inenv
and(F X)
is(eval E env)(eval X env)
. Alternatively extend the type of SEXPR to include anINTERPRETED
constructor and do the same - (Advanced) Change the evaluator to allow special forms to be placed in the environment.
Putting it all together
I haven't copied over the parser from last time (that's the first exercise); but all the missing new bits are here.
type SEXPR = SYMBOL of string | NUMBER of int | NIL | CONS of (SEXPR * SEXPR) | COMPUTATION of COMP | |
and COMP = { func : SEXPR -> SEXPR } ;; | |
// Lexer and Parser go here | |
let EQUAL l1 l2 = | |
let rec tf s1 s2 = match (s1, s2) with | |
(SYMBOL xs, SYMBOL ys) -> xs = ys | |
| (NIL, NIL) -> true | |
| (NUMBER x, NUMBER y) -> x = y | |
| (CONS (a, b), CONS(c, d)) -> (tf a c) && (tf b d) | |
| _ -> false | |
if tf l1 l2 then (SYMBOL "TRUE") else NIL | |
let rec lookup x l = | |
match l with | |
CONS(CONS(a, b), c) -> match EQUAL x a with | |
NIL -> lookup x c | |
| _ -> b | |
| _ -> NIL;; | |
let rec MAP f l = | |
match l with | |
CONS(a, b) -> CONS(f a, MAP f b) | |
| _ -> NIL;; | |
let HD l = | |
match l with | |
CONS(a, b) -> a | |
| _ -> NIL;; | |
let TL l = | |
match l with | |
CONS(a, b) -> b | |
| _ -> NIL;; | |
let NULL l = | |
match l with | |
NIL -> (SYMBOL "TRUE") | |
| _ -> NIL;; | |
let ATOMP l = | |
match l with | |
(SYMBOL _) -> (SYMBOL "TRUE") | |
|(NUMBER _) -> (SYMBOL "TRUE") | |
| _ -> NIL;; | |
let SYMBOLP l = | |
match l with | |
(SYMBOL _) -> (SYMBOL "TRUE") | |
| _ -> NIL;; | |
let NUMBERP l = | |
match l with | |
(NUMBER _) -> (SYMBOL "TRUE") | |
| _ -> NIL;; | |
let LISTP l = | |
match l with | |
CONS(a, b) -> (SYMBOL "TRUE") | |
| NIL -> (SYMBOL "TRUE") | |
| _ -> NIL;; | |
let OPERATOR op k l = | |
let rec AUX l = | |
match l with | |
CONS((NUMBER a), b) -> op a (AUX (TL l)) | |
| _ -> k | |
in NUMBER (AUX l) ;; | |
let ADD l = OPERATOR ( + ) 0 l;; | |
let MULT l = OPERATOR ( * ) 1 l;; | |
let SUB l = OPERATOR ( - ) 0 l;; | |
let DIV l = OPERATOR ( / ) 1 l;; | |
let ZEROP l = | |
match l with | |
(NUMBER 0) -> (SYMBOL "TRUE") | |
|_ -> NIL;; | |
let rec APPEND l1 l2 = | |
match l1 with | |
CONS(a, b) -> CONS(a, APPEND b l2) | |
| NIL -> l2 | |
| _ -> NIL;; | |
let rec ZIP l1 l2 = | |
match (l1, l2) with | |
(CONS(x, xs), CONS(y, ys)) -> CONS(CONS(x, y), ZIP xs ys) | |
| _ -> NIL;; | |
let apply f l = | |
match f with | |
COMPUTATION g -> g.func l | |
| _ -> NIL;; | |
let rec applySnd f l = | |
match l with | |
CONS(CONS(n, v), r) -> CONS(CONS(n, f v), applySnd f r) | |
| _ -> NIL;; | |
let BHD X = HD(HD X);; | |
let BTL X = TL(HD X);; | |
let BCONS X = CONS(HD X, HD(TL X));; | |
let BNULL X = NULL(HD X);; | |
let BATOMP X = NULL(HD X);; | |
let BSYMBOLP X = SYMBOLP (HD X);; | |
let BNUMBERP X = NUMBERP(HD X);; | |
let BLISTP X = LISTP(HD X);; | |
let BEQUAL X = EQUAL(HD X) (HD (TL X));; | |
let BADD X = ADD X;; | |
let BMULT X = MULT X;; | |
let BSUB X = SUB X;; | |
let BDIV X = DIV X;; | |
let BZEROP X = ZEROP (HD X);; | |
let builtin = | |
[SYMBOL "TRUE", SYMBOL "TRUE"; | |
SYMBOL "FALSE", NIL; | |
SYMBOL "HD", COMPUTATION{func = BHD}; | |
SYMBOL "TL", COMPUTATION{func = BTL}; | |
SYMBOL "CONS", COMPUTATION{func = BCONS}; | |
SYMBOL "NULL", COMPUTATION{func = BNULL}; | |
SYMBOL "ATOMP", COMPUTATION{func = BATOMP}; | |
SYMBOL "SYMBOLP", COMPUTATION{func = BSYMBOLP}; | |
SYMBOL "NUMBERP", COMPUTATION{func = BNUMBERP}; | |
SYMBOL "LISTP", COMPUTATION{func = BLISTP}; | |
SYMBOL "EQUAL", COMPUTATION{func = BEQUAL}; | |
SYMBOL "ADD", COMPUTATION{func = BADD}; | |
SYMBOL "MULT", COMPUTATION{func = BMULT}; | |
SYMBOL "SUB", COMPUTATION{func = BSUB}; | |
SYMBOL "DIV", COMPUTATION{func = BDIV}; | |
SYMBOL "ZEROP", COMPUTATION{func = BZEROP}];; | |
let rec makeEnv l = | |
match l with | |
[] -> NIL | |
|(x1, x2)::xs -> CONS(CONS(x1, x2), makeEnv xs);; | |
let BASE = makeEnv builtin;; | |
(* add error routines so that errors in secial forms report error and | |
impose error propogation in eval *) | |
let ERROR1 x = CONS((SYMBOL "ERROR"), CONS(x, NIL));; | |
let ERROR2 name x = CONS((SYMBOL "ERROR"), CONS(CONS(SYMBOL name, x), NIL));; | |
let rec eval e x = | |
match x with | |
(SYMBOL s) -> lookup (SYMBOL s) e | |
|(NUMBER i) -> (NUMBER i) | |
|NIL -> NIL | |
|CONS((SYMBOL "IF"), b) -> match b with | |
CONS(cond, CONS(thenpart, CONS(elsepart, NIL))) -> | |
match (eval e cond) with | |
NIL -> (eval e elsepart) | |
| _ -> (eval e thenpart) | |
| _ -> ERROR2 "IF" b | |
|CONS((SYMBOL "LET"), b) -> match b with | |
CONS(bindings, CONS(body, NIL)) -> | |
let pairs = applySnd (eval e) bindings | |
in | |
eval (APPEND pairs e) body | |
| _ -> ERROR2 "LET" b | |
|CONS((SYMBOL "LAMBDA"), b) -> match b with | |
CONS(formals, CONS(body, NIL)) -> | |
COMPUTATION {func = (fun actuals -> let pairs = ZIP formals actuals | |
eval (APPEND pairs e) body)} | |
| _ -> ERROR2 "LAMBDA" b | |
|CONS((SYMBOL "REC"), b) -> match b with | |
CONS(F, CONS(formals, CONS(body, NIL))) -> | |
let rec f = | |
COMPUTATION {func = (fun actuals -> let pairs = ZIP formals actuals | |
eval (APPEND pairs (CONS(CONS(F, f), e))) body)} | |
f | |
| _ -> ERROR2 "REC" b | |
|CONS((SYMBOL "QUOTE"), CONS(X, NIL)) -> X | |
|CONS((SYMBOL "ENV"), NIL) -> e | |
|CONS((SYMBOL "EVAL"), CONS(X, NIL)) -> eval e (eval e X) | |
|CONS((SYMBOL "EVAL"), CONS(X, CONS(Y, NIL))) -> eval (eval e Y) (eval e X) | |
|CONS(a, b) -> apply (eval e a) (MAP (eval e) b) | |
| X -> ERROR1 X;; | |
(* tidy up *) | |
let exec str = eval BASE (parse str);; | |
(* tests | |
exec "(LET ((F . (LAMBDA (X) (MULT X X)))) (F 6))";; | |
exec "(LET ((F . (REC F (N) (IF (EQUAL N 0) 1 (MULT N (F (SUB N 1))))) )) (F 6))";; | |
exec "(LET ((LEN . (REC F (L) (IF (EQUAL L NIL) 0 (ADD 1 (F (TL L))) ) ) )) | |
(LEN (QUOTE (1 2 3))))";; | |
*) |
No comments :
Post a Comment