OCaml Bicicleta-language interpreter that can run factorial

Kragen Javier Sitaker kragen at pobox.com
Sat Mar 24 03:37:01 EDT 2007


The experience of running the REPL included here is a far cry from the
spreadsheet-like exploration/debugging UI I have in mind, but it at
least allows interactive testing of simple programs.  It should get a
lot better in the coming days.

Here's a sample interactive session:

    Beauty:~/devel/bicicleta kragen$ ./bicicleta_repl
    Bicicleta version 3, Copyright (C) 2007  Kragen Javier Sitaker
    Bicicleta comes with ABSOLUTELY NO WARRANTY; for details see the file
    "COPYING".
    This is free software, and you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    '()' = 2 + 3 * 4    # note: no operator precedence, rather deliberately
    "20"
    '()' = {x=1, y=2}.y  # define an object literal, call a method/read a property
    "2"
    '()' = {'yo mama' = "so fat", 'my mama' = "don't you talk bout my mama!"}.'yo mama'  # method names contain arbitrary characters if you quote them
    "so fat"
    '()' = -3
    parse error
    '()' = 3.negated  # no prefix operators in the language
    "-3"
    '()' = 3 < 4
    "true"
    '()' = 3 < 3
    "false"
    '()' = 3 <= 3
    "true"
    '()' = {self: x=1, y=self.x+2}.y   # you can define one property in terms of another
    "3"
    '()' = {self: x=1, y=self.x+2}{x=5}.y  # overriding the value of x in a derived object
    "7"
    '()' = {'()' = "hi"}.'()'  # method names with weird characters again, but
    "hi"
    '()' = {'()' = "hi"}()  # this one has syntactic sugar to call it
    "hi"
    '()' = {self: x=1, '()' = self.x+2}(x=5)  # including with overrides
    "7"
    '()' = {self: arg1=1, '()' = self.arg1+2}(5)  # positional args are arg1, arg2...
    "7"
    '()' = {fac: arg1 = 3, n=fac.arg1, '()' = (fac.n < 2).if_true(then = 1, else = fac.n * fac(fac.n - 1))}(5) # here's a recursive factorial.
    "120"
    '()' = {self: '[]' = {idx: 4, '()' = idx.arg1 * 2 + 1}}[45]  # '[]' is called by the foo[] syntax
    "91"
    '()' = {this: x=43, y=44, show={userdata=this}}  # try to display an object instead of a string or a number, and ...
    #16=#17=#14# {
    this: x = 43 (in
    (omitted 330 lines where it dumps the entire standard library)
    '()' = 3.5 / 3.1  # there is floating-point math; mixed-mode is still broken
    "1.12903225806"

There is a tar.gz file of all this source code, plus a little more, at
http://pobox.com/~kragen/sw/bicicleta-3.tar.gz for easier downloading
--- about 33 kilobytes.  There are already an earlier version at
http://pobox.com/~kragen/sw/bicicleta-1.tar.gz (and 0).  I'm sorry
it's such a huge email.

Unlike everything posted to kragen-hacks without a notice to the
contrary, this code is not in the public domain; I retain copyright,
but you can redistribute it and/or modify it under the terms of the
GNU General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option) any
later version.

First, here's the build script that compiles the REPL and other
things.  I wrote it in sh because I'm on a Mac without make, and I
don't know where to download make.


#!/bin/sh
# Build script for prototype Bicicleta interpreter
set -ve
: ${OCAMLC=ocamlc} ${EXTRAS=}
$OCAMLC -c bicicleta_syntax.ml
ocamlyacc bicicleta_parser.mly
$OCAMLC -c bicicleta_parser.mli
$OCAMLC -c bicicleta_parser.ml
ocamllex bicicleta_lexer.mll
$OCAMLC -c bicicleta_lexer.ml
ocaml $EXTRAS bicicleta_syntax.cmo bicicleta_parser.cmo bicicleta_lexer.cmo \
    bicicleta.ml  # for regression tests
$OCAMLC -c bicicleta.ml
ocaml $EXTRAS bicicleta_syntax.cmo bicicleta_parser.cmo bicicleta_lexer.cmo \
    bicicleta.cmo bicicleta_lib.ml  # more regression tests
$OCAMLC -c bicicleta_lib.ml
$OCAMLC -c bicicleta_repl.ml
$OCAMLC bicicleta_syntax.cmo bicicleta_lexer.cmo bicicleta_parser.cmo \
    bicicleta.cmo bicicleta_lib.cmo bicicleta_repl.cmo -o bicicleta_repl
ocaml $EXTRAS bicicleta_syntax.cmo bicicleta_lexer.cmo bicicleta_parser.cmo \
    bicicleta.cmo bicicleta_lib.cmo bicicleta_dump.ml  # to see show_bicexpr go
$OCAMLC -c bicicleta_run_script.ml
$OCAMLC bicicleta_syntax.cmo bicicleta_lexer.cmo bicicleta_parser.cmo \
    bicicleta.cmo bicicleta_lib.cmo bicicleta_run_script.cmo \
    -o bicicleta_run_script


Here's bicicleta_syntax.ml, which defines the structure of Bicicleta
expressions and values:


(* Bicicleta language interpreter 
 Copyright (C) 2007  Kragen Javier Sitaker 

 This program is free software; you can redistribute it and/or modify 
 it under the terms of the GNU General Public License as published by 
 the Free Software Foundation; either version 2 of the License, or 
 (at your option) any later version. 

 This program is distributed in the hope that it will be useful, 
 but WITHOUT ANY WARRANTY; without even the implied warranty of 
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 GNU General Public License for more details. 

 You should have received a copy of the GNU General Public License 
 along with this program; if not, write to the Free Software 
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA *)
type methods = NoDefs
               | Definition of string * bicexpr * methods (* name, body, ... *)
and bicexpr = Name of string
              | Call of bicexpr * string
              | Literal of string option * methods
              | Derivation of bicexpr * string option * methods
              | StringConstant of string
              | Integer of int
              | Float of float
              | NativeMethod of (lookup -> bicobj)
and userdata = UserString of string
              | UserInteger of int 
              | UserFloat of float
and bicobj = ProtoObject 
    (* Five positional parameters is bad style; six is worse.
       Derive of name, selfname, body, env, next, cache *)
             | Derive of string * string option * bicexpr * 
                 lookup * bicobj * (string, bicobj) Hashtbl.t option ref
             | UserData of userdata
             | Error of string * string
and lookup = Phi | Add of string * bicobj * lookup ;;
(* other potential definition of lookup:
  type lookup ;;
  type add = { name: string; value: string; next: lookup; } ;;
  type lookup = Phi | Add of lookup ;; *)


It is used in the usual way by bicicleta_parser.mly, an ocamlyacc
file, to produce Bicicleta expressions from token streams:

/* -*- mode: tuareg; compile-command: "./build" -*- */
/* Bicicleta language interpreter 
 Copyright (C) 2007  Kragen Javier Sitaker 

 This program is free software; you can redistribute it and/or modify 
 it under the terms of the GNU General Public License as published by 
 the Free Software Foundation; either version 2 of the License, or 
 (at your option) any later version. 

 This program is distributed in the hope that it will be useful, 
 but WITHOUT ANY WARRANTY; without even the implied warranty of 
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 GNU General Public License for more details. 

 You should have received a copy of the GNU General Public License 
 along with this program; if not, write to the Free Software 
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
%{
open Bicicleta_syntax;;

(* Reorder the definitions so the earliest ones are outermost; there's
no semantic reason for this, but changing it would require changing
the existing parse regression tests, and also show_bicexpr.  It's also
a convenient place to add names for positional arguments. *)
let mk_defs deflist = 
  let rec fd_defs argcount = function
      [] -> NoDefs
    | (Some name, expr) :: defs -> 
        Definition(name, expr, fd_defs argcount defs)
    | (None, expr) :: defs ->
        Definition("arg" ^ string_of_int argcount, expr,
                  fd_defs (argcount + 1) defs)
  in fd_defs 1 (List.rev deflist) ;;
%}
%token <string> NameTok
%token <string> StringTok
%token <string> OpTok
%token <string> IntTok
%token <string> FloatTok
%token Lparen Rparen Lbrace Rbrace Colon Period Newline Comma Equals Eof
%token Lsquare Rsquare
/* As written, this grammar has five ambiguities: is 1 + 2 + 3 supposed
   to be (1 + 2) + 3 or 1 + (2 + 3), and is 1 + x(3) supposed to be 
   1 + (x(3)) or (1 + x)(3) (with equivalent cases for 1+x[3], 1+x{3},
   and 1+x.y)?  The first one is resolved with %left OpTok below, and
   the others are resolved with the %nonassoc declaration, which gives
   those tokens higher precedence than OpTok by virtue of coming later
   in the source file. ocamlyacc -v is helpful for diagnosing this! */
%left OpTok
%nonassoc Period Lparen Lbrace Lsquare
%start main
%type <Bicicleta_syntax.bicexpr> main
%%

/* 22 productions --- a fairly small grammar.  If you try to write a
   grammar for Common Lisp, you end up with 16 productions to handle
   lists, dotted pairs, symbols, strings, integers, floats, ratios,
   chars, quote, quasiquote, unquote, unquote-splicing, and vectors,
   but that leaves out #=, ##, #*, #:, #|...|#, #+, #-, #., #a, #c,
   #b, #o, #x, #r, $p, #s, ",.", and user-defined read macros.  Also,
   some macros are pretty hairy; the CLHS entry for LOOP has a BNF
   grammar with 34 nonterminals. */

main:
    expr Eof { $1 }
expr:
    expr Period NameTok   { Call($1, $3) }
  | NameTok               { Name $1 }
  | StringTok             { StringConstant $1 }
  | literal               { Literal(fst $1, snd $1) }
  | expr literal          { Derivation($1, fst $2, snd $2) }
  | expr Lparen definitions Rparen { Call(Derivation($1, None, mk_defs $3),
                                         "()") }
  | expr Lsquare definitions Rsquare { Call(Derivation(Call($1, "[]"), None,
                                                          mk_defs $3), 
                                           "()") }
  | Lparen expr Rparen    { $2 }
  | IntTok                { Integer (int_of_string $1) }
  | FloatTok              { Float (float_of_string $1) }
  | expr OpTok expr       { Call(Derivation(Call($1, $2), None,
                                               mk_defs [None, $3]), "()") }
literal:
    Lbrace NameTok Colon definitions Rbrace { ((Some $2), mk_defs $4) }
  | Lbrace definitions Rbrace { (None, mk_defs $2) }
definitions:
    NameTok Equals expr   { [Some $1, $3] }
  | definitions def_separator NameTok Equals expr { (Some $3, $5) :: $1 }
  | expr { [None, $1] }
  | definitions def_separator expr { (None, $3) :: $1 }
  | /* empty */ { [] }
def_separator:
    Comma { () } | Comma Newline { () } | Newline { () }


The token streams, for their part, are produced by the lexer,
bicicleta_lexer.mll, which is written in ocamllex:

{ (* -*- mode: tuareg; compile-command: "./build" -*- *)
(* Bicicleta language interpreter 
 Copyright (C) 2007  Kragen Javier Sitaker 

 This program is free software; you can redistribute it and/or modify 
 it under the terms of the GNU General Public License as published by 
 the Free Software Foundation; either version 2 of the License, or 
 (at your option) any later version. 

 This program is distributed in the hope that it will be useful, 
 but WITHOUT ANY WARRANTY; without even the implied warranty of 
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 GNU General Public License for more details. 

 You should have received a copy of the GNU General Public License 
 along with this program; if not, write to the Free Software 
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA *)
    open Bicicleta_parser ;;
    let show_bictoken = function
        | NameTok name -> "NameTok " ^ name
        | StringTok string -> "StringTok " ^ string
        | OpTok op -> "OpTok " ^ op
        | IntTok num -> "IntTok " ^ num
        | FloatTok num -> "FloatTok " ^ num
        | Newline -> "Newline"
        | Lparen -> "(" | Rparen -> ")" | Lbrace -> "{" | Rbrace -> "}"
        | Colon -> ":"  | Period -> "." | Comma -> ","  | Equals -> "="
        | Lsquare -> "[" | Rsquare -> "]"
        | Eof -> "Eof" ;;
    (* this has to be in the standard library somewhere, right? *)
    (* the previous version of this code was arithmetic-heavy and
       bug-prone, but probably a lot faster.  Shorter too if you count
       list_of_string and string_of_list. *)
    let list_of_string string = 
      let rv = ref [] in 
        for i = String.length string - 1 downto 0 do 
          rv := string.[i] :: !rv 
        done ; 
        !rv ;;
    let rec string_of_list = function
        [] -> "" 
      | x :: y -> String.make 1 x ^ string_of_list y ;;
    exception ScrewedUpUnquotableList ;;
    let unquote_list = function x :: tail ->
      let rec unquote_tail accum = function
          [_] -> List.rev accum
        | '\\' :: x :: tail | x :: tail -> unquote_tail (x :: accum) tail
        | [] -> raise ScrewedUpUnquotableList
      in unquote_tail [] tail
      | [] -> raise ScrewedUpUnquotableList ;;
    let unquote s = string_of_list (unquote_list (list_of_string s)) ;;
}

let alus = ['A'-'Z' 'a'-'z' '_']
let alnumus = alus | ['0'-'9']
let ident = alus alnumus*

let quoted_name = "'" ([^ '\'' '\\'] | "\\'" | "\\\\" )* "'"

let string = '"' ([^ '"' '\\'] | "\\\"" | "\\\\")* '"'

(* see my rationals post: any sequence of these chars, except a single '=' *)
let non_eq_op_char = ['~' '`' '!' '@' '$' '%' '^' '&' '*' 
                      '-' '+' '<' '>' '?' '/' '|' '\\'] 
let op_char = non_eq_op_char | '='
let operator = non_eq_op_char | op_char op_char+

let integer = ['0'-'9']+
let float = ['0'-'9']* '.' ['0'-'9']+
(* let special = ['(' ')' '{' '}' ':' '.' '\n' ',' '='] *)
let lwsp = [' ' '\t']

let comment = '#' [^ '\n']* '\n'?
rule next =
   parse 
     lwsp { next lexbuf } (* skip whitespace; trick stolen from manual *)
   | ident as name { NameTok name }
   | quoted_name as name { NameTok (unquote name) }
   | string as str { StringTok (unquote str) }
   | operator as op { OpTok op }
   | integer as num { IntTok num }
   | float as num { FloatTok num }
   | '\n' { Newline } | comment { Newline }
   | '(' { Lparen } | ')' { Rparen } | '{' { Lbrace } | '}' { Rbrace } 
   | ':' { Colon }  | '.' { Period } | ',' { Comma }  | '=' { Equals }
   | '[' { Lsquare } | ']' { Rsquare }
   | eof { Eof }


The actual evaluation (and unit testing) is taken care of by this
file, bicicleta.ml:


(* -*- mode: tuareg; compile-command: "./build" -*- *)
(* translation of metacircular_bicicleta_interpreter into OCaml *)
(* Bicicleta language interpreter 
 Copyright (C) 2007  Kragen Javier Sitaker 

 This program is free software; you can redistribute it and/or modify 
 it under the terms of the GNU General Public License as published by 
 the Free Software Foundation; either version 2 of the License, or 
 (at your option) any later version. 

 This program is distributed in the hope that it will be useful, 
 but WITHOUT ANY WARRANTY; without even the implied warranty of 
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 GNU General Public License for more details. 

 You should have received a copy of the GNU General Public License 
 along with this program; if not, write to the Free Software 
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA *)
(* Because the types and the functions are all mutually recursive, I'm
   declaring them all (in bicicleta_syntax.ml), then putting pretty
   much all the code in a single let rec. *)
(* for interactive use:
#load "bicicleta_lexer.cmo" ;;
#load "bicicleta_parser.cmo" ;;
#load "bicicleta.cmo" ;;
open Bicicleta ;;
open Bicicleta_syntax ;;
*)
open Bicicleta_syntax ;;
let rec get gkey = function
    | Phi -> (Error("self-name not found", gkey))
    | Add(key, value, next) -> if gkey = key then value else get gkey next ;;

exception CantCreateCache ;;  (* can't happen *)
let sys_expr = Call(Name "prog", "sys") ;;
let (call_count: (string, int) Hashtbl.t) = Hashtbl.create 100 ;;
let get_call_count name = try Hashtbl.find call_count name 
  with Not_found -> 0 ;;
let rec eval env = function
    | Name name -> get name env
    | Call(object_, method_name) -> 
        Hashtbl.replace call_count method_name
          (get_call_count method_name + 1) ;
        let object_ = eval env object_
        in apply (objectget method_name object_) object_
    | Literal(self, methods) -> bind ProtoObject env self methods
    | Derivation(object_, self, methods) -> let object_ = eval env object_
        in bind object_ env self methods
    | Integer num ->
        wrap_userdata env (UserInteger num) "native_integer"
    | Float num ->
        wrap_userdata env (UserFloat num) "native_float"
    | StringConstant string ->
        wrap_userdata env (UserString string) "native_string"
    | NativeMethod f -> f env
  and wrap_userdata env datum typename = 
    let userdata = UserData datum
    in eval env (Derivation(Call(sys_expr, typename), None,
        Definition("userdata", NativeMethod (fun _ -> userdata), NoDefs)))
  and bind base env self = function
    | NoDefs -> base
    | Definition(name, body, next) -> 
        bind (derive name self body env base) env self next
  and objectget key = function
    | ProtoObject -> (Error("method not found", key))
    | Derive(name, _, _, _, next, _) as m -> 
        if key = name then m else objectget key next
    | Error(_, _) as obj -> obj
    | UserData _ -> (Error("userdata has no methods", key))
  and derive name self body env o = 
    (Derive(name, self, body, env, o, ref None))
  and apply method_ self = match method_ with
    | Derive(name, methodself, body, env, _, _) -> 
        callmethod name (methodself, body, env) self
    | Error(_, _) as err -> err (* OK for now... *)
    | _ -> (Error("trying to apply non-method", ""))
  and callmethod name (selfname, body, env) self =
  match self with
      Derive(_, _, _, _, _, maybecache) -> 
        (match !maybecache with None -> maybecache := Some (Hashtbl.create 7)
          | Some _ -> ());
        (match !maybecache with None -> raise CantCreateCache
          | Some cache ->
              try Hashtbl.find cache name
                with Not_found -> let result = 
                  callmethodfull (selfname, body, env) self
                  in Hashtbl.replace cache name result;
                  result)
    | _ -> callmethodfull (selfname, body, env) self
  and callmethodfull (selfname, body, env) self =
    (match selfname with 
        Some name -> eval (Add(name, self, env)) body 
      | None -> eval env body)
;;

(* for debugging: deparse an expression. Could be considered a
   specification of a subset of the grammar, but does not (yet)
   exercise the following:
   - commas
   - the x{...}.'()' as x(...) syntactic sugar
   - the x.'*'(y) as x * y syntactic sugar
   - the x{arg1=a, arg2=b, arg3=c} as x{a, b, c} syntactic sugar
*)

let is_identifier string =
    let is_id_start_char ch = let code = Char.code ch in
        (code >= Char.code 'A' && code <= Char.code 'Z') ||
        (code >= Char.code 'a' && code <= Char.code 'z') || ch = '_'
    in let is_id_char ch = is_id_start_char ch || let code = Char.code ch in
        (code >= Char.code '0' && code <= Char.code '9')
    in
    let rec test string ii =
        if ii = String.length string then true
        else is_id_char string.[ii] && test string (ii + 1)
    in String.length string > 0 && is_id_start_char string.[0] &&
        test string 1 ;;
let escstr quote string =
  let rec esclist quote = function
      [] -> []
    | '\\' :: cs -> '\\' :: '\\' :: esclist quote cs
    | c :: cs when c = quote -> '\\' :: c :: esclist quote cs
    | c :: cs -> c :: esclist quote cs
  in Bicicleta_lexer.string_of_list 
    (esclist quote (Bicicleta_lexer.list_of_string string));;
let escname name = 
  if is_identifier name then name
  else "'" ^ escstr '\'' name ^ "'" ;;
let rec show_bicexpr_i indent = function
  | Name name -> escname name
  | StringConstant string -> "\"" ^ escstr '"' string ^ "\""
  | Derivation(object_, self, methods) -> 
      show_bicexpr_i indent object_ 
      ^ show_bicexpr_i indent (Literal(self, methods))
  | Literal(Some self, methods) ->
      "{" ^ escname self ^ ": " ^ outer_show_methods indent methods ^ "}"
  | Literal(None, methods) ->
      "{" ^ outer_show_methods indent methods ^ "}"
  | Call(object_, method_name) -> 
      show_bicexpr_i indent object_ ^ "." ^ escname method_name 
  | Integer n -> string_of_int n
  | Float n -> string_of_float n
  | NativeMethod _ -> "(;native method;)"
and outer_show_methods indent = function
  | Definition(_, _, Definition(_, _, _)) as m -> 
      let newindent = indent ^ "  " in
        "\n" ^ newindent ^ show_methods newindent m ^ "\n" ^ indent
  | m -> show_methods indent m
and show_methods indent = function
  | NoDefs -> ""
  | Definition(name, body, NoDefs) -> show_method indent name body
  | Definition(name, body, next) ->
      show_method indent name body ^ "\n" ^ indent ^ show_methods indent next
and show_method indent name body = 
  escname name ^ " = " ^ (show_bicexpr_i indent body) ;;
let show_bicexpr = show_bicexpr_i "" ;;

(* show_bicobj: mostly for error reporting, also used in the REPL.
   Doesn't work well, but that's not surprising if you look at the
   code.  The idea is that the "labels" list is supposed to keep us
   from printing out the same thing more than once. *)
exception ShowBicobjIsBroken;;
let rec _show_bicenv level labels = function 
    Phi -> ""
  | Add(name, obj, rest) ->
      String.make level ' ' ^ name ^ ": " 
      ^ __show_bicobj (level+2) labels obj ^ ";\n" 
      ^ _show_bicenv level labels rest
and show_selfname = function None -> "" | Some selfname -> selfname ^ ": "
and _show_bicobj level labels = function
    UserData (UserInteger x) -> string_of_int x
  | UserData (UserString x) -> "\"" ^ x ^ "\""
  | UserData (UserFloat x) -> string_of_float x
  | Error (a, b) -> "Error(\"" ^ a ^ "\", \"" ^ b ^ "\")"
  | Derive (name, selfname, body, env, next, _) ->
      __show_bicobj level labels next ^ " {\n" 
      ^ String.make level ' ' ^ show_selfname selfname ^ name 
      ^ " = " ^ show_bicexpr body ^ " (in\n" 
      ^ _show_bicenv level labels env ^ ")}" 
  | ProtoObject -> "{}" 
and findq obj = function
  | [] -> None
  | x :: y -> if obj == x then Some (List.length y) else findq obj y
and labelfor labels obj = 
  match findq obj !labels with 
      None -> raise ShowBicobjIsBroken 
    | Some label -> "#" ^ string_of_int label ^ "="
and __show_bicobj level labels obj = 
  match findq obj !labels with
      None -> 
        labels := obj :: !labels; 
        labelfor labels obj ^ _show_bicobj level labels obj
    | Some label -> "#" ^ string_of_int label ^ "#"
;;
let show_bicobj x = _show_bicobj 0 (ref []) x ;;

(* for unit tests and interactive testing *)
let tokenize_ next string = let buf = Lexing.from_string string 
  in let rec get_toks toks = 
    let tok = next buf in
      match tok with 
          Bicicleta_parser.Eof -> List.rev toks
        | _ -> get_toks (tok :: toks)
  in get_toks [] ;;
let tokenize = tokenize_ Bicicleta_lexer.next ;;  (* for unit tests *)

(* This ugly, hairy piece of rot is supposed to simplify the parser by
   keeping the poor widdle parser from having to cope with extra
   newlines in random places.  See, we want to use newlines to
   separate method definitions, but we want to ignore them the rest of
   the time.  So the obvious thing to do is to remove newlines that
   aren't strictly needed to support method definitions in a
   post-processing stage for the lexer.  So in between the lexer (32
   lines of ocamllex at present) and the parser (50 lines of
   ocamlyacc) we interpose this 42-line monstrosity.  It seemed like a
   good idea when Meredith Patterson suggested it, but she didn't know
   I was using an ocamllex-generated parser and would therefore have
   to do this in an intermediate step... *)

exception EmptyWindowCantHappen;;
let drop_unnecessary_newlines lexer_next =
  let (window : Bicicleta_parser.token list ref) = ref [] 
  and last_token = ref None 
  in let next lexbuf =
    let rec really_read_token tok = 
      window := !window @ [tok] ; last_token := Some tok; main_switch ()
    and read_token () = 
      let tok = lexer_next lexbuf
      in match !last_token with 
          None -> really_read_token tok
        | Some Bicicleta_parser.Newline -> (match tok with
              Bicicleta_parser.Newline -> read_token ()
            | _ -> really_read_token tok)
        | Some _ -> really_read_token tok
    and output_token () = match !window with
        [] -> raise EmptyWindowCantHappen
      | x :: y -> window := y; x
    and discard_newline () = match !window with 
        [] -> raise EmptyWindowCantHappen
      | x :: y -> window := y ; main_switch ()
    and main_switch () = match !window with
        [] -> read_token ()
      | [Bicicleta_parser.Newline] -> read_token ()
      | [_] -> output_token ()
      | [Bicicleta_parser.Newline; Bicicleta_parser.NameTok _] ->
          read_token ()
      | [Bicicleta_parser.Newline; _] -> discard_newline ()
      | [Bicicleta_parser.Newline; Bicicleta_parser.NameTok _;
         Bicicleta_parser.Equals] -> output_token ()
      | [Bicicleta_parser.Newline; Bicicleta_parser.NameTok _;
         Bicicleta_parser.Newline] -> read_token ()
      | [Bicicleta_parser.Newline; Bicicleta_parser.NameTok _;
         _] -> discard_newline ()
      | [Bicicleta_parser.Newline; Bicicleta_parser.NameTok _;
         Bicicleta_parser.Newline; Bicicleta_parser.Equals] ->
          output_token ()
      | [Bicicleta_parser.Newline; Bicicleta_parser.NameTok _;
         Bicicleta_parser.Newline; _] -> discard_newline ()
      | _ :: _ -> output_token ()
    in main_switch () 
  in next;;

let parse string = Bicicleta_parser.main 
  (drop_unnecessary_newlines Bicicleta_lexer.next)
  (Lexing.from_string string) ;;
let unquote = Bicicleta_lexer.unquote ;;

(* free variable computation, for error detection. *)
module StringSet = Set.Make(String) ;;
let rec stringset = function
    [] -> StringSet.empty
  | s :: rest -> StringSet.add s (stringset rest) ;;

let rec freevars = function
    Name n -> stringset [n]
  | Integer _ | StringConstant _ | Float _ -> stringset ["prog"]
  | NativeMethod _ -> stringset []
  | Literal (Some selfname, methods) -> 
        StringSet.diff (freevars_methods methods) (stringset [selfname])
  | Literal (None, methods) -> freevars_methods methods
  | Derivation(object_, self, methods) ->
        StringSet.union (freevars object_) (freevars (Literal(self, methods)))
  | Call(object_, _) -> freevars object_
 and freevars_methods = function
    NoDefs -> stringset []
  | Definition (name, body, rest) ->
        StringSet.union (freevars body) (freevars_methods rest) ;;
let freevars_list expr = StringSet.elements (freevars expr) ;;

(* What method names are implemented in some piece of source? *)
let rec implemented = function
    Name _ | NativeMethod _ -> stringset []
  | Integer _ | StringConstant _ | Float _ -> stringset ["userdata"]
  | Derivation(object_, _, methods) ->
      StringSet.union (implemented object_) (implemented_methods methods)
  | Literal(_, methods) -> implemented_methods methods
  | Call(object_, _) -> implemented object_
and implemented_methods = function
    NoDefs -> stringset []
  | Definition(name, body, rest) -> 
      StringSet.union (stringset [name])
        (StringSet.union (implemented body) (implemented_methods rest))
;;
let implemented_list expr = StringSet.elements (implemented expr) ;;

(* What method names are called in some piece of source? *)
let rec called = function
    Name _ | NativeMethod _ -> stringset []
  | Integer _ -> stringset ["sys"; "native_integer"]
  | StringConstant _ -> stringset ["sys"; "native_string"]
  | Float _ -> stringset ["sys"; "native_float"]
  | Derivation(object_, _, methods) ->
      StringSet.union (called object_) (called_methods methods)
  | Literal(_, methods) -> called_methods methods
  | Call(object_, method_name) ->
      StringSet.union (called object_) (stringset [method_name])
and called_methods = function
    NoDefs -> stringset []
  | Definition(_, body, rest) ->
      StringSet.union (called body) (called_methods rest)
;;
let called_list expr = StringSet.elements (called expr) ;;

(* Looking at the output from this routine has caught a number of bugs. *)
let lint expr =
  let callees = called expr and implementees = implemented expr
  in ["only called", 
         StringSet.elements (StringSet.diff callees implementees);
     "only implemented", 
         StringSet.elements (StringSet.diff implementees callees);
     "free variables", freevars_list expr] ;;

let unit_tests () =
    assert (is_identifier "foo") ;
    assert (is_identifier "x") ;
    assert (is_identifier "x3") ;
    assert (not (is_identifier "3")) ;
    assert (is_identifier "x_y") ;
    assert (is_identifier "_x") ;
    assert (not (is_identifier "()")) ;
    assert (not (is_identifier "(3")) ;
    assert (not (is_identifier "")) ;

    assert ((escname "hips") = "hips") ;
    assert ((escname "()") = "'()'") ;
    assert ((escname "don't") = "'don\\'t'") ;
    assert ((escname "\\") = "'\\\\'") ;

    (* environment lookup *)
    let ustr x = UserData (UserString x)
    in 
    assert (get "foo" (Add("foo", ustr "bar", Phi)) = ustr "bar") ;
    assert ((get "foo" Phi) = Error("self-name not found", "foo")) ;
    assert ((get "foo" (Add("bar", ustr "baz", Phi))) = 
        Error("self-name not found", "foo")) ;

    (* evaluation tests without parsing *)
    let string_value_is expr str = 
      (eval Phi (Call(expr, "userdata"))) = ustr str
    in
    assert (string_value_is (StringConstant "foo") "foo") ;
    assert (string_value_is (Call(Literal(Some "self", 
               Definition("foo", StringConstant("quux"), NoDefs)), "foo"))
           "quux") ;
    assert ((eval Phi (Call(Literal(Some "self", 
           Definition("foo", StringConstant("quux"), NoDefs)), "baz"))) 
           = Error("method not found", "baz")) ;
    (* note that this expression is very similar to the definition of
       prog.sys.bool in bicicleta_lib.ml.  That's because it's an earlier
       version. *)
    let booleans = Literal(Some "booleans", 
        Definition("true", Literal(Some "boolean", 
            Definition("if_true", Literal(Some "self", 
                Definition("()", Call(Name "self", "then"),
                Definition("then", StringConstant("no consequent given"),
                Definition("else", StringConstant("no alternate given"),
                NoDefs)))),
            Definition("negated", Call(Name "booleans", "false"),
            Definition("if_false", Call(Call(Name "boolean", "negated"), 
                                                              "if_true"),
            NoDefs)))),
        Definition("false", Derivation(Call(Name "booleans", "true"), 
            Some "boolean",
            Definition("if_true", Derivation(Call(Call(Name "booleans", "true"),
                "if_true"), Some "self",
                Definition("()", Call(Name "self", "else"), 
                NoDefs)),
            Definition("negated", Call(Name "booleans", "true"),
            NoDefs))),
        NoDefs)))
    in

    (* print_endline (show_bicexpr booleans) ; *)
    assert (string_value_is(Call(Call(Call(booleans, "true"), 
                                     "if_true"), "()"))
                           "no consequent given") ;
    assert (string_value_is(Call(Derivation(Call(Call(booleans, "true"), 
                                                "if_true"), None,
             Definition("then", StringConstant("is true"),
             Definition("else", StringConstant("is false"),
             NoDefs))),
           "()")) 
           "is true") ;
    assert (string_value_is (Call(Derivation(Call(Call(booleans, "false"),
                                                "if_true"), None,
             Definition("then", StringConstant("is true"),
             Definition("else", StringConstant("is false"),
             NoDefs))),
           "()")) 
           "is false") ;

    (* unquoting *)
    assert((unquote "'foo'") = "foo");
    assert((unquote "\"foo\"") = "foo");
    assert((unquote "'hasn\\'t'") = "hasn't");
    assert((unquote "\"\\\"no\\\" \"") = "\"no\" ");
    assert((unquote "\"\\\"no\\\"\"") = "\"no\"");

    (* tokenizing *)
    (* wish I could say 
       'from Bicicleta_parser import Lbrace, Rbrace, NameTok, Colon, Equals',
       but I really don't want to open Bicicleta_parser;; here. *)

    let lbrace = Bicicleta_parser.Lbrace and rbrace = Bicicleta_parser.Rbrace
        and nametok n = Bicicleta_parser.NameTok n 
        and colon = Bicicleta_parser.Colon and equals = Bicicleta_parser.Equals
        and optok o = Bicicleta_parser.OpTok o 
        and period = Bicicleta_parser.Period 
        and inttok n = Bicicleta_parser.IntTok n 
        and lparen = Bicicleta_parser.Lparen
        and rparen = Bicicleta_parser.Rparen and comma = Bicicleta_parser.Comma
        and newline = Bicicleta_parser.Newline
        and stringtok x = Bicicleta_parser.StringTok x
    in
    assert((tokenize "a") = [nametok "a"]);
    assert((tokenize "a.b") = [nametok "a"; period; nametok "b"]);
    assert((tokenize "aa+bb") = [nametok "aa"; optok "+"; nametok "bb"]);
    assert((tokenize "{xx: }") = [lbrace; nametok "xx"; colon; rbrace]);
    assert((tokenize "'()' * (2**32)") = [nametok "()"; optok "*"; lparen;
                                          inttok "2"; optok "**"; inttok "32";
                                          rparen]);
    assert((tokenize "\"foo\".length") = [stringtok "foo"; period; 
                                          nametok "length"]);
    assert((tokenize "(){}:.,") = [lparen; rparen; lbrace; rbrace; colon; 
                                   period; comma]);
    assert((tokenize "~`!@$%^&*-+=<>?/|\\") = [optok "~`!@$%^&*-+=<>?/|\\"]);
    assert((tokenize "{x:y=x}") = [lbrace; nametok "x"; colon; nametok "y";
                                   equals; nametok "x"; rbrace]);
    assert((tokenize "foo { foo : \n x = foo. \tbletch\ny=foo.x\n}\n")
           = [nametok "foo"; lbrace; nametok "foo"; colon; newline; 
              nametok "x"; equals; nametok "foo"; period; nametok "bletch";
              newline;
              nametok "y"; equals; nametok "foo"; period; nametok "x";
              newline; rbrace; newline]);
    assert((tokenize "x[3]") = [nametok "x"; Bicicleta_parser.Lsquare; 
                                inttok "3"; Bicicleta_parser.Rsquare]);

    (* parsing *)
    assert((parse "x") = Name "x");
    assert((parse "'()'") = Name "()");
    assert((parse "foo.bar.baz") = Call(Call(Name "foo", "bar"), "baz"));
    assert((parse "\"foo\".length") = Call(StringConstant "foo", "length"));
    assert((parse "{x:y=x}") 
          = Literal(Some "x", Definition("y", Name "x", NoDefs)));
    assert((parse "{x:}") = Literal(Some "x", NoDefs));
    assert((parse "{xx:y=xx,z=xx}") 
          = Literal(Some "xx", Definition("y", Name "xx",
                  Definition("z", Name "xx", NoDefs))));
    assert((parse "{x: x = \"x\", y = \"y\", z = \"z\"}") = Literal(Some "x",
                  Definition("x", StringConstant("x"),
                  Definition("y", StringConstant("y"),
                  Definition("z", StringConstant("z"), NoDefs)))));
    assert((parse "point {p: x = \"x\", y = \"y\"}") = Derivation(Name "point",
        Some "p", Definition("x", StringConstant("x"),
                  Definition("y", StringConstant("y"), NoDefs))));
    assert((parse "(a.x)") = Call(Name "a", "x"));             
    assert((parse "(((a).x))") = Call(Name "a", "x"));
    assert((parse "{x: y=x,\n  z=x}") = Literal(Some "x", 
        Definition("y", Name "x",
        Definition("z", Name "x", NoDefs))));
    assert((parse "{x: y=x,\n\n  z=x}") = Literal(Some "x", 
        Definition("y", Name "x",
        Definition("z", Name "x", NoDefs))));
    assert((parse "x\n") = Name "x");
    (* support end-line comments at EOF *)
    assert((parse "x # a variable!") = (parse "x"));
    (* separate methods with newlines: *)
    assert((parse "{x:y=x\nz=x}") = Literal(Some "x",        
        Definition("y", Name "x",
        Definition("z", Name "x", NoDefs))));
    (* allow newlines around period *)
    assert((parse "x\n.\ny \n . \n z") = Call(Call(Name "x", "y"), "z"));
    (* allow newlines in parens *)
    assert((parse "(\nx\n)") = Name "x");
    (* allow leading newlines *)
    assert((parse "\nx") = Name "x");
    (* allow newlines between prototype and overrides *)
    assert((parse "x{y: z = w}") = (parse "x\n{y: z = w}"));
    (* allow newlines around self-name *)
    assert((parse "x {y: z = w}") = (parse "x {\ny\n:z=w}"));
    (* allow newlines around equals sign *)
    assert((parse "x{y:z=w}") = (parse "x{y:z\n=\nw}"));
    assert((parse "x{y:z=w,v=u}") = (parse "x{y:z\n=\nw\nv\n=\nu}"));
    (* treat end-line comments as newlines *)
    assert((parse "{a=b # b!\n c=d}") = parse("{a=b, c=d}"));

    (* various kinds of syntactic sugar: *)
    (* omitting self-names *)
    assert((parse "x { z = w }") 
           = Derivation(Name "x", None, Definition("z", Name "w", NoDefs)));
    assert((parse "{x = 1, y = 2}") = Literal(None,
        Definition("x", Integer 1, Definition("y", Integer 2, NoDefs))));
    (* parenthesized arguments *)
    assert((parse "x(verbose = 1)") = (parse "x { verbose = 1 }.'()'"));
    assert((parse "{env: fac = {fac: x = 3, 
            '()' = fac.x.'<'(arg1=2).if_true(then=1, 
                else=fac.x.'*'(arg1=env.fac(x=fac.x.'-'(arg1=1))))}}.fac(x = 4)
        ") = (parse "{env: fac = {fac: x = 3,
            '()' = fac.x.'<'{arg1=2}.'()'.if_true{then=1, 
                 else=fac.x.'*'{arg1=env.fac{
                     x=fac.x.'-'{arg1=1}.'()'}.'()'}.'()'}.'()'}
            }.fac{x = 4}.'()'")) ;

    (* positional arguments *)
    assert((parse "x(3)") = (parse "x(arg1 = 3)"));
    assert((parse "x{3}") = (parse "x{arg1 = 3}"));
    assert((parse "x{y: 3}") = (parse "x{y: arg1 = 3}"));
    assert((parse "x(37, quiet=true)") = (parse "x(arg1 = 37, quiet = true)"));
    assert((parse "x(37, 38)") = (parse "x(arg1 = 37, arg2 = 38)"));

    (* infix *)
    assert((parse "3 + 4") = (parse "3.'+'(4)"));
    assert((parse "1 + 2 + 3") = (parse "1.'+'(2).'+'(3)"));

    (* indexing notation *)
    assert((parse "x[3]") = (parse "x.'[]'(3)"));

    (* combination of infix with other things *)
    assert((parse "1 + x(3)") = (parse "1 + (x(3))"));
    assert((parse "1 + x[3]") = (parse "1 + (x[3])"));
    assert((parse "1 + x{3}") = (parse "1 + (x{3})"));

    (* just a check to see if our parser doesn't crash. *)
    ignore(parse "prog.sys.normal_commutative_number {self:
        clientdata = 5.clientdata
        show = self.clientdata.show
        as_integer = self
        as_float = self.clientdata.as_float
        coerce = {op: arg1 = 2, '()' = op.arg1.as_integer}
        add = {op: arg1 = 3, '()' = self.clientdata.add(op.arg1.clientdata)}
        negated = self.clientdata.negated
        multiply = self.add {op:
            '()' = self.clientdata.multiply(op.arg1.clientdata)
        }
        modulo = {op: arg1 = 2
            '()' = self.clientdata.divmod(op.arg1.clientdata).mod
        }
        # As Jamie McCarthy points out, rationals made out of machine
        # integers are really pretty limited... but for now I'm using them
        # anyway.
        divide = {op: arg1 = 2, '()' = prog.sys.rational.new(self, op.arg1)}
        reciprocal = 1 / self
        gcd = self.binop {op: arg1 = 5
            '()' = (arg1 == 0).if_true(then = self, 
                else = op.arg1.gcd(self % op.arg1))
        }
        less_than = self.add {op:
            '()' = self.clientdata.less_than(op.arg1.clientdata)
        }
    }");

    (* ensure newlines don't separate positional parameters *)
    assert((parse "x{{}\n{}}") = (parse "x{{}{}}"));

    (* more evaluation tests now that we know parsing works *)
    assert((eval Phi (parse "3.userdata")) = UserData (UserInteger 3));
    assert((eval Phi (parse "3.5.userdata")) = UserData (UserFloat 3.5));
    assert(string_value_is (parse "{}{\n\tx = \"foo\"\n\ty=\"bar\"}.x") "foo");
    assert(string_value_is (parse "{\n\tx = \"foo\"\n\ty=\"bar\"}.y") "bar");

    (* evaluation with native method *)
    let native_add = NativeMethod (fun env -> 
      let x = eval env (Call(Name "method", "arg1"))
      and y = eval env (Call(Name "method", "arg2"))
      in match (x, y) with
          (UserData (UserInteger xs), UserData (UserInteger ys)) -> 
            UserData (UserInteger (xs + ys))
              (* saving the invalid args is a pain, so we don't bother *)
        | _ -> Error ("invalid addition args", "??"))
    in let intrinsics = derive "integer_add" (Some "method") native_add Phi
                              ProtoObject
    in let addition = parse "intrinsics { 3.userdata, 4.userdata }.integer_add"
    in let seven = eval (Add("intrinsics", intrinsics, Phi)) addition
    in 
    assert(seven = UserData (UserInteger 7));
      (* That the above works is a little strange.  If we just eval
         "3" in an environment without "prog", we get an evaluation
         error saying it needs "prog"; and likewise if we eval
         "3.beanstalk"; but if we eval "3.userdata", we get the right
         thing.  That's because the "3" is actually inheriting from an
         error object --- but that works!  As long as you don't try to
         call any methods you were hoping the error object would be
         defining, but only methods you put there. *)

    (* showing expressions *)
    assert((parse (show_bicexpr booleans)) = booleans);
    assert((show_bicexpr (parse "{x = 1}")) = "{x = 1}");
    assert((show_bicexpr (parse "{'doesn\\'t' = 1}")) = "{'doesn\\'t' = 1}");
    assert((show_bicexpr (parse "{foo = \"bar\"}")) = "{foo = \"bar\"}");
    assert((show_bicexpr (parse "{foo = \"ba\\\"r\"}")) = 
        "{foo = \"ba\\\"r\"}");
    assert((show_bicexpr (parse "3.5")) = "3.5");

    (* computing free variables *)
    let freevars_are expr vars = 
        StringSet.equal (freevars(parse(expr))) (stringset vars)
    in
    assert(freevars_are "x" ["x"]) ;
    assert(freevars_are "1" ["prog"]) ;
    assert(freevars_are "1.2" ["prog"]) ;
    assert(freevars_are "\"foo\"" ["prog"]) ;
    assert(freevars_are "{x: y=z, w=x, t=uv}" ["z"; "uv"]) ;
    assert(freevars_are "x{y: z=w, zz=x, zzz=y}" ["w"; "x"]) ;
    assert(freevars_are "x.y" ["x"]) ;
    (* this code contains no free variables, ... *)
    assert(freevars_are "{env: fac = {fac: x = 3,
    '()' = fac.x.'<'{lt: arg1=2}.'()'.if_true{i: then=1, 
         else=fac.x.'*'{mu: arg1=env.fac{f:
             x=fac.x.'-'{m: arg1=1}.'()'}.'()'}.'()'}.'()'}
    }.fac{f: x = 4}.'()'" ["prog"]) ;
    (* but this buggy version did: *)
    assert(freevars_are "{env: fac = {fac: x = 3,
    '()' = x.'<'{lt: arg1=2}.'()'.if_true{i: then=1, 
         else=fac.x.'*'{mu: arg1=env.fac{f:
             x=fac.x.'-'{m: arg1=1}.'()'}.'()'}.'()'}.'()'}
    }.fac{f: x = 4}.'()'" ["x"; "prog"]) ;

    assert(freevars_are "{}" []);
    assert(freevars_are "{x=y}" ["y"]);
    assert(freevars_are "x {y = z}" ["x"; "z"]);

    (* computing lists of method names for linting *)
    let implemented_are expr vars =
      StringSet.equal (implemented(parse(expr))) (stringset vars)
    in
    assert(implemented_are "x" []);
    assert(implemented_are "3" ["userdata"]);
    assert(implemented_are "3.5" ["userdata"]);
    assert(implemented_are "\"foo\"" ["userdata"]);
    (* not really a good way to tell here... *)
    assert(StringSet.equal (implemented (NativeMethod (fun _ -> ProtoObject)))
            (stringset []));
    assert(implemented_are "{}" []);
    assert(implemented_are "{a=b}" ["a"]);
    assert(implemented_are "{a=b, c=d}" ["a"; "c"]);
    assert(implemented_are "{a=b}{c=d}" ["a"; "c"]);
    assert(implemented_are "{a=b}.c.d" ["a"]);
    assert(implemented_are "{a={b={c=d}.e}.f}" ["a"; "b"; "c"]);

    let called_are expr vars =
      StringSet.equal (called(parse(expr))) (stringset vars)
    in
    assert(called_are "x" []);
    assert(called_are "3" ["sys"; "native_integer"]);
    assert(called_are "3.5" ["sys"; "native_float"]);
    assert(called_are "\"foo\"" ["sys"; "native_string"]);
    (* there is no way to tell here: *)
    assert(StringSet.equal (called (NativeMethod (fun _ -> ProtoObject)))
            (stringset []));
    assert(called_are "b.c" ["c"]);
    assert(called_are "b.c.d.e.f" ["c"; "d"; "e"; "f"]);
    assert(called_are "{}" []);
    assert(called_are "{a=b.c.d}" ["c"; "d"]);
    assert(called_are "{a=b.c.d, e=f.g.h}" ["c"; "d"; "g"; "h"]);
    assert(called_are "{a=b.c.d, e=f.g.h}" ["c"; "d"; "g"; "h"]);
    assert(called_are "{a=b.c.d, e=f.g.h}{i=j.k}.mnop"
            ["c"; "d"; "g"; "h"; "k"; "mnop"]);
    assert(called_are "{a={b={c=d}.e}.f}" ["e"; "f"]);
;;
unit_tests();;



This file, bicicleta_lib.ml, contains some native functions and the
beginnings of a standard library.

(* -*- mode: tuareg; compile-command: "./build" -*- *)
(* The beginnings of a standard library, with some basic native methods. *)
(* Bicicleta language interpreter 
 Copyright (C) 2007  Kragen Javier Sitaker 

 This program is free software; you can redistribute it and/or modify 
 it under the terms of the GNU General Public License as published by 
 the Free Software Foundation; either version 2 of the License, or 
 (at your option) any later version. 

 This program is distributed in the hope that it will be useful, 
 but WITHOUT ANY WARRANTY; without even the implied warranty of 
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 GNU General Public License for more details. 

 You should have received a copy of the GNU General Public License 
 along with this program; if not, write to the Free Software 
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA *)
(* for interactive use:
#load "bicicleta_lexer.cmo" ;;
#load "bicicleta_parser.cmo" ;;
#load "bicicleta.cmo" ;;
#load "bicicleta_lib.cmo" ;;
open Bicicleta ;;
open Bicicleta_syntax ;;
open Bicicleta_lib ;;
*)
open Bicicleta_syntax ;;
open Bicicleta ;;

let prog_sys_source = "{prog: sys = {
object = {self:
        # One day, things will inherit from this implicitly if not 
        # otherwise specified.
        # These two methods are intended to be defined differently on errors.
        '!!' = {op: '()' = self}
        is_ok = prog.sys.bool.true
        # But not this one.
        '->' = {op: arg1 = self, key = self, value = op.arg1}
    }
bool =
    {bool: 
        true = prog.sys.object {self: 
            if_true = {if:
                '()' = if.then
                then = prog.error(\"no consequent given\")
                else = prog.error(\"no alternate given\")
            }
            not = bool.false
            if_false = self.not.if_true
            '||' = {or: '()' = self}
            '&&' = {and: arg1 = self, '()' = and.arg1}
            show = \"true\"
        }
        false = bool.true {self:
            if_true = bool.true.if_true{if: '()' = if.else}
            not = bool.true
            '||' = {or: arg1 = self, '()' = or.arg1}
            '&&' = {and: '()' = self}
            show = \"false\"
        }
    }

# prog.sys.normal_commutative_number:

# This is a base class for number-like things that support the usual
# commutative numerical operations, such as native integers, integers
# in Z_n, floating-point numbers, rational numbers, vectors,
# polynomials, rational expressions, continued fractions, complex
# numbers, sampled audio signals, dates, time intervals, images,
# multidimensional arrays, and so on.

# Here's a table of the levels of support you can provide to the
# methods herein.

# If you implement:     You get proper support for:
# add                   +
# add, negated          +, negated, -
# multiply              *
# multiply, reciprocal  *, reciprocal, /
# power                 **
# modulo                %
# less_than             <, ==, >, <=, >=, !=, between
# less_than, coerce(0)  <, ==, >, <=, >=, !=, between, abs, negative, positive

# You can also override 'subtract', 'divide', 'equals', and
# 'greaterthan' to get the expected effects; there are default
# implementations built from the above six methods.  If you do that,
# you may want to override 'reverse -' and 'reverse /' as well.

# If you override the 'coerce' method to convert objects to some type
# your arithmetic methods know how to support, it will convert them to
# the right type ahead of time.

# The 'coerce' method need not return objects supporting the same
# protocol as your own, although they need to support 'negated' if you
# want to subtract them, and they need to support 'reciprocal' if you
# want to divide by them.  So the 'coerce' method for a date object
# might coerce things to time intervals and then support + and - with
# them.  By the same token, your arithmetic methods might return
# objects of some other type --- but remember that 'negated' might get
# fed to 'add', and 'reciprocal' might get fed to 'multiply'.

# In order to support arithmetic on either side of the operator, the
# operator methods, such as '+', delegate to the 'reverse ...' method
# on the other object if the 'add' method fails --- and they provide a
# 'reverse +' method that does the right thing, in case the other
# object wants to delegate to it.  However, the 'reverse ...' methods
# depend on the operations being commutative; this is in exchange for
# the relatively loose requirements on 'coerce' described above.

# No implementations are provided for 'reverse **' and 'reverse %'.

normal_commutative_number = 
    prog.sys.object {self:
        coerce = {op: arg1 = 1, '()' = op.arg1}
        binop = {op: arg1 = 1, other = self.coerce(op.arg1)}

        normal_binop = self.binop {op:
            op = self.add
            revop = op.arg1.'reverse +'
            result = op.op(op.other)
            commuted = op {op: '()' = op.result}
            '()' = op.result !! op.revop(self)
        }
        '+' = self.normal_binop
        'reverse +' = self.'+'.commuted
        subtract = {op: arg1=2, '()' = self.add(op.arg1.negated)}
        '-' = self.normal_binop {op: op = self.subtract, 
            revop = op.arg1.'reverse -'}
        'reverse -' = self.binop {op: '()' = self.negated.add(op.other)}

        '*' = self.normal_binop {op: op = self.multiply, 
            revop = op.arg1.'reverse *' }
        'reverse *' = self.'*'.commuted
        divide = {op: arg1=2, '()' = self.multiply(op.arg1.reciprocal)}
        '/' = self.normal_binop {op: op = self.divide, 
            revop = op.arg1.'reverse /'}
        'reverse /' = self.binop {op: '()' = self.reciprocal.multiply(op.other)}

        '%' = self.normal_binop {op: op = self.modulo, 
            revop = op.arg1.'reverse %' }
        '**' = self.normal_binop {op: op = self.power, 
            revop = op.arg1.'reverse **'}

        '<' = self.normal_binop {op: op = self.less_than, 
            revop = op.arg1.'reverse <'}
        greaterthan = {op: '()' = op.arg1 < self}
        '>' = self.normal_binop {op: op = self.greaterthan, 
            revop = op.arg1.'reverse >'}
        'reverse <' = self.'>'.commuted
        'reverse >' = self.'<'.commuted
        # BEWARE! As the Common Lisp HyperSpec says about EQUALP:
        # Object equality is not a concept for which there is a uniquely
        # determined correct algorithm. The appropriateness of an equality
        # predicate can be judged only in the context of the needs of some
        # particular program. Although these functions take any type of
        # argument and their names sound very generic, equal and equalp
        # are not appropriate for every application.
        equals = {op: arg1 = 2
            '()' = (self < op.arg1).not && (self > op.arg1).not}
        '==' = self.normal_binop {op: op = self.equals, 
            revop = op.arg1.'reverse =='}
        'reverse ==' = self.'=='.commuted
        inverse_comparator = self.normal_binop {op:
            baseop = self.less_than
            revop = op.arg1.'reverse >='
            op = {ge: '()' = op.baseop(ge.arg1).not}
        }
        '>=' = self.inverse_comparator
        '<=' = self.inverse_comparator {op: baseop = self.greaterthan, 
            revop = op.arg1.'reverse <='}
        'reverse >=' = self.'<='.commuted
        'reverse <=' = self.'>='.commuted
        '!=' = self.inverse_comparator {op: baseop = self.'==',
            revop = op.arg1.'reverse !='}
        'reverse !=' = self.'!='.commuted
        between = {op: arg1 = 2.negated, arg2 = 2
            '()' = (self >= op.arg1) && (self < op.arg2)}
        negative = self < 0
        positive = self.negative.not
        abs = self.negative.if_true(then=self.negated, else=self)
    }

# XXX Doesn't work because prog.if doesn't exist yet because we don't 
# have variadic functions because we don't have lists.
rational = 
    prog.sys.normal_commutative_number {self:
        numer = 1
        denom = 2
        new = {op: arg1 = 6, arg2 = 9,
            g = op.arg1.gcd(op.arg2)
            numer = op.arg1 / op.g
            denom = op.arg2 / op.g
            denom_is_1 = op.denom.is_ok && (op.denom == 1)
            ok_test = (op.numer * op.denom)
            # XXX note that prog.if doesn't exist yet!
            '()' = prog.if(
                op.denom_is_1 -> op.numer,
                op.ok_test.is_ok -> self { numer = op.numer, denom = op.denom },
                else = op.ok_test)
        }
        show = \"{numer} <hr> {denom}\" % self
        coerce = {op: arg1 = 2
            '()' = prog.if(
                op.arg1.denom.is_ok -> op.arg1,
                op.arg1.as_integer.is_ok -> self.new(op.arg1.as_integer, 1),
                else = prog.error(\"could not coerce {arg1} to rational\" % op)
            )
        }
        add = {op: arg1 = self.new(2, 3),
            '()' = self.new(
                (self.numer * op.arg1.denom) + (self.denom * op.arg1.numer),
                self.denom * op.arg1.denom
            )
        }
        negated = self.new(self.numer.negated, self.denom)
        multiply = self.add {op:
            '()' = self.new(self.numer * op.arg1.numer, 
                            self.denom * op.arg1.denom)
        }
        reciprocal = self.new(self.denom, self.numer)
        less_than = {op: arg1 = self.new(2, 4),
            '()' = (self.numer * op.arg1.denom) < (self.denom * op.arg1.numer)
        }
        as_float = self.numer.as_float / self.denom.as_float
        as_integer = prog.if(self.denom == 1, 
            then=self.numer,
            else=prog.error(\"{frac} is not an integer\" % {frac=self}))
    }

# prog.sys.native_integer:

# The idea is that the interpreter will derive things from this object
# by overriding their \"userdata\" to point to an opaque integer
# object that can be passed to the native methods that do show,
# as_float, add, negated, multiply, divmod, and less_than.

# I'm still undecided about what those operations should return ---
# just the userdata result (making this code a little uglier) or the
# native_integer object (in which case the primitive arithmetic
# operations as well as the parser have to know about the
# prog.sys.native_integer name)?  For now I'm going with the former.

native_integer =
    prog.sys.normal_commutative_number {self:
        userdata = 5.userdata # as an example
        new = {new: arg1 = 6.userdata
            '()' = prog.sys.native_integer { userdata = new.arg1 }}
        intrinsics = intrinsics{self.userdata}
        show = prog.sys.native_string.new(self.intrinsics.integer_show)
        as_integer = self
        as_float = prog.sys.native_float.new(self.intrinsics.integer_as_float)
        coerce = {op: arg1 = 2, '()' = op.arg1.as_integer}
        intrinsic_op = {op: arg1 = 3,
            intrinsics = self.intrinsics {
                arg2 = op.arg1.userdata
                # these two are for less_than:
                true = prog.sys.bool.true
                false = prog.sys.bool.false
            }
            userdata = op.intrinsics.integer_add
            '()' = self.new(op.userdata)   # by default
        }
        add = self.intrinsic_op
        negated = self.new(self.intrinsics.integer_negated)
        multiply = self.intrinsic_op {op:
            userdata = op.intrinsics.integer_multiply
        }
        modulo = self.intrinsic_op {op:
            userdata = op.intrinsics.integer_divmod.mod
        }
        # As Jamie McCarthy points out, rationals made out of machine
        # integers are really pretty limited... but for now I'm using them
        # anyway.
        divide = {op: arg1 = 2, '()' = prog.sys.rational.new(self, op.arg1)}
        reciprocal = 1 / self
        gcd = self.binop {op: arg1 = 5
            '()' = (op.arg1 == 0).if_true(then = self, 
                else = op.arg1.gcd(self % op.arg1))
        }
        less_than = self.intrinsic_op {op:
            '()' = op.intrinsics.integer_less_than
        }

        # Here are some faster versions of ops with less indirection, 
        # but which fail badly in mixed-mode arithmetic.
        #'+' = {op: '()' = self.new(
        #    intrinsics{self.userdata, op.arg1.userdata}.integer_add)}
        #'-' = {op: '()' = self.new(
        #    intrinsics{self.userdata, op.arg1.userdata}.integer_subtract)}
        #'<' = {op:
        #    '()' = intrinsics{
        #        self.userdata, op.arg1.userdata,
        #        true=self.true,
        #        false=self.false
        #    }.integer_less_than
        #}
    }

# The float userdata has divide instead of modulo; otherwise it's
# quite similar to the integer.  My original design let it inherit
# pretty much all the methods, because the primitives were properties
# of the userdata, so they were named things like 'show' instead of
# 'integer_show', but I decided sticking all the operations in an
# 'intrinsics' namespace was simpler.

native_float = prog.sys.native_integer {self:
        userdata = 3.2.userdata
        new = {new: arg1 = 3.5.userdata
            '()' = prog.sys.native_float { userdata = new.arg1 }}
        show = prog.sys.native_string.new(self.intrinsics.float_show)
        as_float = self
        as_integer = prog.error(\"Can't coerce floats to integers\")
        modulo = prog.error(\"Can't take modulo of floats yet\")
        coerce = {op: arg1 = 2, '()' = op.arg1.as_float}
        add = self.intrinsic_op {op:
            userdata = op.intrinsics.float_add
        }
        negated = self.new(self.intrinsics.float_negated)
        multiply = self.intrinsic_op {op:
            userdata = op.intrinsics.float_multiply
        }
        divide = self.intrinsic_op {op:
            userdata = op.intrinsics.float_divide
        }
        less_than = self.intrinsic_op {op:
            '()' = op.intrinsics.float_less_than
        }
    }

native_string = prog.sys.object {self:
        userdata = \"bethmolnar\".userdata
        new = {new: arg1 = \"matthew\".userdata
            '()' = prog.sys.native_string { userdata = new.arg1 }}
        show = self
    }

}}" ;;

let getarg arg env = eval env (Call(Name "method", arg)) ;;
let arg1 = getarg "arg1" ;;
let arg2 = getarg "arg2" ;;
let primitive_binary myfun = 
  NativeMethod(fun env -> myfun (arg1 env, arg2 env) env) ;;
let integer_binary op = primitive_binary(fun args env ->
  match args with
      (UserData (UserInteger xi), UserData (UserInteger yi)) ->
        op xi yi env
    | (x, y) -> Error ("invalid binary operation args", 
                      show_bicobj x ^ ", " ^ show_bicobj y)) ;;
let integer_unary op = primitive_binary(fun args env ->
  match args with (UserData (UserInteger xi), _) -> op xi env
    | (x, _) -> Error ("invalid unary operation arg", show_bicobj x)) ;;
let intret op a b env = UserData (UserInteger (op a b)) ;;
let true_expr = parse "method.true" ;;
let false_expr = parse "method.false" ;;
let boolret op a b env = eval env (if op a b then true_expr else false_expr) ;;
let defintrinsic name contents rest = 
  derive name (Some "method") contents Phi rest ;;

let integer_intrinsics = 
  (defintrinsic "integer_multiply"  (integer_binary (intret ( * )))
  (defintrinsic "integer_add"       (integer_binary (intret (+)))
  (* integer_subtract: not currently used *)
  (defintrinsic "integer_subtract"  (integer_binary (intret (-)))
  (defintrinsic "integer_less_than" (integer_binary (boolret (<)))
  (defintrinsic "integer_negated"   (integer_unary (fun x env ->
    UserData (UserInteger ~-x)))
  (defintrinsic "integer_show"      (integer_unary (fun x env ->
    UserData (UserString (string_of_int x))))
        (* XXX also need divmod *)
  ProtoObject)))))) ;;

let float_binary op = primitive_binary(fun args env ->
  match args with
      (UserData (UserFloat xi), UserData (UserFloat yi)) ->
        op xi yi env
    | (x, y) -> Error ("invalid binary operation args", 
                      show_bicobj x ^ ", " ^ show_bicobj y)) ;;
let float_unary op = primitive_binary(fun args env ->
  match args with (UserData (UserFloat xi), _) -> op xi env
    | (x, _) -> Error ("invalid unary operation arg", show_bicobj x)) ;;
let floatret op a b env = UserData (UserFloat (op a b)) ;;

let basic_intrinsics = 
  (defintrinsic "float_show"       (float_unary (fun x env ->
    UserData (UserString (string_of_float x))))
  (defintrinsic "float_add"        (float_binary (floatret (+.)))
  (defintrinsic "float_negated"    (float_unary (fun x env -> 
    UserData (UserFloat ~-.x)))
  (defintrinsic "float_multiply"   (float_binary (floatret ( *.)))
  (defintrinsic "float_divide"     (float_binary (floatret (/.)))
  (defintrinsic "float_less_than"  (float_binary (boolret (<)))
  (defintrinsic "integer_as_float" (integer_unary (fun x env ->
    UserData (UserFloat (float_of_int x))))
  integer_intrinsics))))))) ;;

let basic_prog = eval (Add("intrinsics", basic_intrinsics, Phi)) 
  (parse prog_sys_source) ;;
let eval_with_lib expr = eval (Add("intrinsics", basic_intrinsics, 
                               Add("prog", basic_prog, Phi)))
  (Call(Derivation(Name "prog", Some "prog",
    Definition("()", expr, NoDefs)), "()")) ;;

let unit_tests () =
    (* really complicated evaluation *)
    let lib_eval_is str userdata =
      let actual_result = (eval_with_lib (parse ("(" ^ str ^ ").userdata")))
      in let rv = actual_result = UserData(userdata)
      in (if not rv then print_endline (show_bicobj actual_result));
        rv
    in
    (* basic boolean behavior *)
    assert(lib_eval_is "prog.sys.bool.true.show" (UserString "true"));
    assert(lib_eval_is "prog.sys.bool.false.show" (UserString "false"));
    assert(lib_eval_is "prog.sys.bool.true.not.show" (UserString "false"));
    assert(lib_eval_is "prog.sys.bool.false.not.show" (UserString "true"));
    assert(lib_eval_is "(prog.sys.bool.true && prog.sys.bool.true).show"
            (UserString "true"));
    assert(lib_eval_is "(prog.sys.bool.true && prog.sys.bool.false).show"
            (UserString "false"));
    assert(lib_eval_is "(prog.sys.bool.false && prog.sys.bool.false).show"
            (UserString "false"));
    assert(lib_eval_is "(prog.sys.bool.false && prog.sys.bool.true).show"
            (UserString "false"));
    assert(lib_eval_is "(prog.sys.bool.true || prog.sys.bool.true).show"
            (UserString "true"));
    assert(lib_eval_is "(prog.sys.bool.true || prog.sys.bool.false).show"
            (UserString "true"));
    assert(lib_eval_is "(prog.sys.bool.false || prog.sys.bool.false).show"
            (UserString "false"));
    assert(lib_eval_is "(prog.sys.bool.false || prog.sys.bool.true).show"
            (UserString "true"));
    (* basic integer arithmetic *)
    assert(lib_eval_is "3.negated" (UserInteger (-3)));
    assert(lib_eval_is "3 + 4" (UserInteger (7)));
    assert(lib_eval_is "(3 + 4).show" (UserString "7"));
    assert(lib_eval_is "\"foo\".show" (UserString "foo"));
    assert(lib_eval_is "prog.sys.bool.true.if_true(then=3, else=4)"
            (UserInteger 3));
    assert(lib_eval_is "(3 > 4).show" (UserString "false"));
    assert(lib_eval_is "(3 < 4).show" (UserString "true"));
    assert(lib_eval_is "(3 < 4).if_true(then=5, else=6)" (UserInteger 5));
    assert(lib_eval_is "(3 > 4).if_true(then=5, else=6)" (UserInteger 6));
    assert(lib_eval_is "(3 > 4).if_false(then=5, else=6)" (UserInteger 5));
    assert(lib_eval_is "{env:
        fac = {fac:
            arg1 = 3
            '()' = (fac.arg1 < 2).if_true(
                then = 1
                else = fac.arg1 * env.fac(fac.arg1 - 1)
            )
        }
    }.fac(5)" (UserInteger 120));
    assert(lib_eval_is "3 * 4" (UserInteger 12));
    (* basic integer comparisons *)
    assert(lib_eval_is "(3 == 3).show" (UserString "true"));
    assert(lib_eval_is "(3 == 4).show" (UserString "false"));

    (* floats *)
    assert(lib_eval_is "3.5" (UserFloat 3.5));
    assert(lib_eval_is "3.5.show" (UserString "3.5"));
    assert(lib_eval_is "3.5 + 4.0" (UserFloat 7.5));
    assert(lib_eval_is "4.5 - 3.0" (UserFloat 1.5));
    assert(lib_eval_is "0.5 * 0.25" (UserFloat 0.125));
    assert(lib_eval_is "3.5 / 2.0" (UserFloat 1.75));
    assert(lib_eval_is "1.0 + 1.0 + 1.0" (UserFloat 3.0));
    assert(lib_eval_is "(3.5 < 4.0).show" (UserString "true"));
    assert(lib_eval_is "(3.5 > 4.0).show" (UserString "false"));
    assert(lib_eval_is "(4.0 > 3.5).show" (UserString "true"));
    assert(lib_eval_is "(3.5 == 3.5).show" (UserString "true"));

    (* a little bit of mixed-mode arithmetic *)
    assert(lib_eval_is "3.5 / 2" (UserFloat 1.75));
    (* This one doesn't work yet because it depends on prog.error 
       existing and having the right '!!' behavior *)
(*     assert(lib_eval_is "2 * 1.5" (UserFloat 3.0)); *)
;;
unit_tests();;


And then, so you can use it interactively, there's this
read-eval-print loop.  It prints some minimal profiling information on
exit.

(* -*- mode: tuareg; compile-command: "./build" -*- *)
(* something that lets you try out expressions *)
(* Bicicleta language interpreter 
 Copyright (C) 2007  Kragen Javier Sitaker 

 This program is free software; you can redistribute it and/or modify 
 it under the terms of the GNU General Public License as published by 
 the Free Software Foundation; either version 2 of the License, or 
 (at your option) any later version. 

 This program is distributed in the hope that it will be useful, 
 but WITHOUT ANY WARRANTY; without even the implied warranty of 
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 GNU General Public License for more details. 

 You should have received a copy of the GNU General Public License 
 along with this program; if not, write to the Free Software 
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA *)
open Bicicleta ;;
open Bicicleta_syntax ;;
open Bicicleta_lib ;;

let call_count_report () =
  let f key value tail = (value, key) :: tail
  in let calls_list = Hashtbl.fold f call_count []
  in (List.fold_left (fun t (v, k) -> t + v) 0 calls_list,
      List.sort (Pervasives.compare) calls_list) ;;

let rec main inp outp = 
  try
    output_string outp "'()' = "; flush outp;
    (try 
        let inval = input_line inp in
        let inexpr = parse inval 
        in let rv = eval_with_lib inexpr
        in let show = eval (Add("rv", rv, Phi)) (parse "rv.show.userdata")
        in 
             output_string outp (show_bicobj show ^ "\n");
          flush outp;
      with
          Parsing.Parse_error -> output_string outp "parse error\n"
    );
    main inp outp
  with End_of_file -> 
    let total, per_method = call_count_report ()
    in List.iter (fun (v, k) -> 
      output_string outp (k ^ ": " ^ string_of_int v ^ "\n"))
      (List.rev per_method);
      output_string outp ("total method calls: " ^ string_of_int total ^ "\n")
;;
Hashtbl.clear call_count ;;
print_endline "Bicicleta version 3, Copyright (C) 2007  Kragen Javier Sitaker
Bicicleta comes with ABSOLUTELY NO WARRANTY; for details see the file
\"COPYING\".
This is free software, and you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
" ;;
main stdin stdout ;;


More information about the Kragen-hacks mailing list