caml2html_test.mli

   1: type 'a weird = E10
   2: module Zéro'04 :
   3:   sig val characters : char list val n : int val the_Truth : bool end
   4: val hel'Lo : string
   5: val ( |* ) : int -> int -> int

caml2html_test.ml

   1: 
   2: (* Test file for caml2html (the first line is empty) *)
   3: 
   4: (* -hc option: link to caml2html_test.mli (same page, colorized)
   5:  *             link to caml2html_test.ml (source) *)
   6: 
   7: # 123 (* line directives are not parsed, sorry... *)
   8: 
   9: (* This is a multi-line "*)"
  10:    comment *)
  11: 
  12: open Printf
  13: 
  14: type 'aa' weird = E10
  15: 
  16: (* nested (* comments *) *)
  17: 
  18: module Zéro'04 = 
  19: struct
  20:   let characters = [ 'a'; '\000'; '\x12'; '
  21: '; '\t'; 'z' ]
  22:   let n = 0X12 + truncate 1.2E-1_2
  23:   let the_Truth =
  24:     let ignore4 a b c d = false in
  25:     not (ignore4 1._0_None 1.0E10E10)
  26: end
  27: 
  28: let hel'Lo = "\"Hello \
  29:                 World!\""
  30: 
  31: let ( |* ) a b =
  32:   match a, b with
  33:       1, 0 | 0, 1 -> 1
  34:     | _ -> 0
  35: 
  36: let _ =
  37:   assert true;
  38:   if 0 mod 1 < 1 && `Abc <> `def then
  39:     print_endline hel'Lo

input.mli

   1: (* 
   2:    Copyright 2004 Martin Jambon
   3: 
   4:    This file is distributed under the terms of the GNU Public License
   5:    http://www.gnu.org/licenses/gpl.txt
   6: *)
   7: 
   8: (*
   9:    This module provides functions that parse OCaml source code and return
  10:    a list of tokens which are suitable for automatic syntax highlighting.
  11:    Any input is accepted. Only a lexical analysis is performed and thus can
  12:    be used to highlight incorrect programs as well as derivatives
  13:    of OCaml (.ml .mli .mll .mly).
  14: *)
  15: 
  16: type token =
  17:   [ `Comment of string   (** a (fragment of) comment *)
  18:   | `Construct of string (** an uppercase identifier or
  19:                              an identifier starting with ` *)
  20:   | `Keyword of string   (** a keyword *)
  21:   | `Newline             (** a newline character *)
  22:   | `String of string    (** a (fragment of) string or character literal *)
  23:   | `Tab                 (** a tabulation character *)
  24:   | `Token of string ]   (** anything else *)
  25: 
  26: val parse : Lexing.lexbuf -> token list
  27: val string : string -> token list
  28: val channel : in_channel -> token list
  29: val file : string -> token list

input.mll

   1: {
   2: (*
   3:   Copyright 2002-2004 Sebastien Ailleret
   4:   Copyright 2004 Martin Jambon
   5: 
   6:   This file is distributed under the terms of the GNU Public License
   7:   http://www.gnu.org/licenses/gpl.txt
   8: *)
   9: 
  10: open Lexing
  11: 
  12: let keywords = [
  13:   "and";
  14:   "as";
  15:   "asr";
  16:   "assert";
  17:   "begin";
  18:   "class";
  19:   "constraint";
  20:   "do";
  21:   "done";
  22:   "downto";
  23:   "else";
  24:   "end";
  25:   "exception";
  26:   "external";
  27:   "false";
  28:   "for";
  29:   "fun";
  30:   "function";
  31:   "functor";
  32:   "if";
  33:   "in";
  34:   "include";
  35:   "inherit";
  36:   "initializer";
  37:   "land";
  38:   "lazy";
  39:   "let";
  40:   "lor";
  41:   "lsl";
  42:   "lsr";
  43:   "lxor";
  44:   "match";
  45:   "method";
  46:   "mod";
  47:   "module";
  48:   "mutable";
  49:   "new";
  50:   "object";
  51:   "of";
  52:   "open";
  53:   "or";
  54:   "private";
  55:   "rec";
  56:   "sig";
  57:   "struct";
  58:   "then";
  59:   "to";
  60:   "true";
  61:   "try";
  62:   "type";
  63:   "val";
  64:   "virtual";
  65:   "when";
  66:   "while";
  67:   "with" ]
  68: 
  69: let depth = ref 0
  70: 
  71: let is_keyword =
  72:   let tbl = Hashtbl.create 100 in
  73:   List.iter (fun key -> Hashtbl.add tbl key ()) keywords;
  74:   Hashtbl.mem tbl
  75: 
  76: let tokenify s =
  77:   if is_keyword s then `Keyword s
  78:   else `Token s
  79: 
  80: let buf = Buffer.create 256
  81: 
  82: let token_list = ref []
  83: let add_token x = token_list := x :: !token_list
  84: 
  85: }
  86: 
  87: let upper = ['A'-'Z' '\192'-'\214' '\216'-'\222']
  88: let lower = ['a'-'z' '\223'-'\246' '\248'-'\255']
  89: let digit = ['0'-'9']
  90: let identchar = upper | lower | digit | ['_' '\'']
  91: let hex = ['0'-'9' 'a'-'f' 'A'-'F']
  92: let oct = ['0'-'7']
  93: let bin = ['0'-'1']
  94: 
  95: let operator_char = 
  96:   [ '!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~']
  97: let infix_symbol =
  98:   ['=' '<' '>' '@' '^' '|' '&' '+' '-' '*' '/' '$' '%'] operator_char*
  99: let prefix_symbol = ['!' '?' '~'] operator_char*
 100: 
 101: rule token = parse
 102: | "(*"
 103:     { Buffer.clear buf;
 104:       Buffer.add_string buf "(*";
 105:       depth := 1;
 106:       comment lexbuf;
 107:       add_token (`Comment (Buffer.contents buf));
 108:       token lexbuf }
 109: | '"'
 110:     { Buffer.clear buf;
 111:       Buffer.add_char buf '"';
 112:       string lexbuf;
 113:       add_token (`String (Buffer.contents buf));
 114:       token lexbuf }
 115: | (('`' (upper | lower)) | upper) identchar *
 116:       { add_token (`Construct (lexeme lexbuf));
 117:         token lexbuf }
 118: | lower identchar *
 119:       { add_token (tokenify (lexeme lexbuf));
 120:         token lexbuf  }
 121: 
 122: | "!=" | "#" | "&" | "&&" | "(" | ")" | "*" | "+" | "," | "-"
 123: | "-." | "->" | "." | ".. :" | "::" | ":=" | ":>" | ";" | ";;" | "<"
 124: | "<-" | "=" | ">" | ">]" | ">}" | "?" | "??" | "[" | "[<" | "[>" | "[|"
 125: | "]" | "_" | "`" | "{" | "{<" | "|" | "|]" | "}" | "~"
 126: 
 127:     { add_token (`Keyword (lexeme lexbuf));
 128:       token lexbuf }
 129: 
 130: | prefix_symbol | infix_symbol
 131:       { add_token (`Token (lexeme lexbuf));
 132:         token lexbuf }
 133: 
 134: | "'\n'" | "'\r\n'" 
 135:       { List.iter add_token [`String "'"; `Newline; `String "'"];
 136:         token lexbuf }
 137: | "'\\\n'" | "'\\\r\n'"
 138:       { List.iter add_token [`String "'\\"; `Newline; `String "'"];
 139:         token lexbuf }
 140: | "'" ([^'\'''\\'] | '\\' (_ | digit digit digit | 'x' hex hex)) "'"
 141:       { add_token (`String (lexeme lexbuf));
 142:         token lexbuf }
 143: 
 144: | '\r'? '\n'
 145:     { add_token `Newline;
 146:       token lexbuf }
 147: | '\t'
 148:     { add_token `Tab;
 149:       token lexbuf }
 150: | eof
 151:     { let l = List.rev !token_list in
 152:       token_list := [];
 153:       l }
 154: | ' '+
 155:     { add_token (`Token (lexeme lexbuf));
 156:       token lexbuf }
 157: 
 158: | '-'? (digit (digit | '_')*
 159:        | ("0x"| "0X") hex (hex | '_')*
 160:        | ("0o"| "0O") oct (oct | '_')*        
 161:        | ("0b"| "0B") bin (bin | '_')* )
 162: 
 163: | '-'? digit (digit | '_')* ('.' (digit | '_')* )? 
 164:       (['e' 'E'] ['+' '-']? digit (digit | '_')* )? 
 165: | _
 166:     { add_token (`Token (lexeme lexbuf));
 167:       token lexbuf }
 168: 
 169: and comment = parse
 170: | "(*"
 171:     { incr depth;
 172:       Buffer.add_string buf "(*";
 173:       comment lexbuf }
 174: | "*)"
 175:     { decr depth; 
 176:       Buffer.add_string buf "*)";
 177:       if (!depth > 0) then comment lexbuf }
 178: | '"'
 179:     { Buffer.add_char buf '"';
 180:       string lexbuf;
 181:       comment lexbuf }
 182: | eof
 183:     { () }
 184: | '\r'? '\n'
 185:     { add_token (`Comment (Buffer.contents buf));
 186:       add_token `Newline;
 187:       Buffer.clear buf;
 188:       comment lexbuf }
 189: | '\t'
 190:     { add_token (`Comment (Buffer.contents buf));
 191:       add_token `Tab;
 192:       Buffer.clear buf;
 193:       comment lexbuf }
 194: | _
 195:     { Buffer.add_string buf (lexeme lexbuf);
 196:       comment lexbuf }
 197: 
 198: and string = parse
 199: | '"'
 200:     { Buffer.add_char buf '"' }
 201: | "\\\\"
 202: | '\\' '"'
 203:     { Buffer.add_string buf (lexeme lexbuf);
 204:       string lexbuf }
 205: | eof
 206:     { () }
 207: | '\r'? '\n'
 208:     { add_token (`String (Buffer.contents buf));
 209:       add_token `Newline;
 210:       Buffer.clear buf;
 211:       string lexbuf }
 212: | '\t'
 213:     { add_token (`String (Buffer.contents buf));
 214:       add_token `Tab;
 215:       Buffer.clear buf;
 216:       string lexbuf }
 217: | _
 218:     { Buffer.add_string buf (lexeme lexbuf);
 219:       string lexbuf }
 220: 
 221: {
 222:   type token = [ `Comment of string
 223:                | `Construct of string
 224:                | `Keyword of string
 225:                | `Newline
 226:                | `String of string
 227:                | `Tab
 228:                | `Token of string ]
 229:   
 230:   let parse = token
 231:   let string s = let lexbuf = Lexing.from_string s in token lexbuf
 232:   let channel ic = let lexbuf = Lexing.from_channel ic in token lexbuf
 233:   let file s = 
 234:     let ic = open_in s in
 235:     let l = channel ic in
 236:     close_in ic;
 237:     l
 238: }

output.ml

   1: (* 
   2:    Copyright 2002-2004 Sébastien Ailleret
   3:    Copyright 2004 Martin Jambon
   4:    
   5:    This file is distributed under the terms of the GNU Public License
   6:    http://www.gnu.org/licenses/gpl.txt
   7: *)
   8: 
   9: (*
  10:    This module provides functions that parse OCaml source code and return
  11:    a list of tokens which are suitable for automatic syntax highlighting.
  12:    Any input is accepted. Only a lexical analysis is performed and thus can
  13:    be used to highlight incorrect programs as well as derivatives
  14:    of OCaml (.ml .mli .mll .mly).
  15: *)
  16: 
  17: open Printf
  18: 
  19: let version = "caml2html 1.2.3"
  20: 
  21: let key_color1 = Some "green"
  22: let key_color2 = Some "#77aaaa"
  23: let key_color3 = Some "#cc9900"
  24: let key_color4 = Some "#990099"
  25: let key_color5 = Some "#808080"
  26: 
  27: let construct_color = (Some "#0033cc", "Cconstructor")
  28: let comment_color = (Some "#990000", "Ccomment")
  29: let string_color = (Some "#aa4444", "Cstring")
  30: 
  31: let alpha_keyword_color = (key_color5, "Calphakeyword")
  32: let nonalpha_keyword_color = (None, "Cnonalphakeyword")
  33: 
  34: let default_keyword_color_list =
  35:   [
  36:     "and", (key_color1, "Cand");
  37:     "as", (key_color1, "Cas");
  38:     "class", (key_color1, "Cclass");
  39:     "constraint", (key_color1, "Cconstraint");
  40:     "exception", (key_color1, "Cexception");
  41:     "external", (key_color1, "Cexternal");
  42:     "fun", (key_color1, "Cfun");
  43:     "function", (key_color1, "Cfunction");
  44:     "functor", (key_color1, "Cfunctor");
  45:     "in", (key_color1, "Cin");
  46:     "inherit", (key_color1, "Cinherit");
  47:     "initializer", (key_color1, "Cinitializer");
  48:     "let", (key_color1, "Clet");
  49:     "method", (key_color1, "Cmethod");
  50:     "module", (key_color1, "Cmodule");
  51:     "mutable", (key_color1, "Cmutable");
  52:     "of", (key_color1, "Cof");
  53:     "private", (key_color1, "Cprivate");
  54:     "rec", (key_color1, "Crec");
  55:     "type", (key_color1, "Ctype");
  56:     "val", (key_color1, "Cval");
  57:     "virtual", (key_color1, "Cvirtual");
  58:     
  59:     "do", (key_color2, "Cdo");
  60:     "done", (key_color2, "Cdone");
  61:     "downto", (key_color2, "Cdownto");
  62:     "else", (key_color2, "Celse");
  63:     "for", (key_color2, "Cfor");
  64:     "if", (key_color2, "Cif");
  65:     "lazy", (key_color2, "Clazy");
  66:     "match", (key_color2, "Cmatch");
  67:     "new", (key_color2, "Cnew");
  68:     "or", (key_color2, "Cor");
  69:     "then", (key_color2, "Cthen");
  70:     "to", (key_color2, "Cto");
  71:     "try", (key_color2, "Ctry");
  72:     "when", (key_color2, "Cwhen");
  73:     "while", (key_color2, "Cwhile");
  74:     "with", (key_color2, "Cwith");
  75:     
  76:     "assert", (key_color3, "Cassert");
  77:     "include", (key_color3, "Cinclude");
  78:     "open", (key_color3, "Copen");
  79:     
  80:     "begin", (key_color4, "Cbegin");
  81:     "end", (key_color4, "Cend");
  82:     "object", (key_color4, "Cobject");
  83:     "sig", (key_color4, "Csig");
  84:     "struct", (key_color4, "Cstruct");
  85:     
  86:     "raise", (Some "red", "Craise");
  87: 
  88:     "asr", (key_color5, "Casr");
  89:     "land", (key_color5, "Cland");
  90:     "lor", (key_color5, "Clor");
  91:     "lsl", (key_color5, "Clsl");
  92:     "lsr", (key_color5, "Clsr");
  93:     "lxor", (key_color5, "Clxor");
  94:     "mod", (key_color5, "Cmod");
  95:     
  96:     "false", (None, "Cfalse");
  97:     "true", (None, "Ctrue");
  98: 
  99:     "|", (key_color2, "Cbar");
 100:   ]
 101: 
 102: let default_keyword_colors =
 103:   let tbl = Hashtbl.create 100 in
 104:   List.iter
 105:     (fun (s, (color, css_class)) -> 
 106:        Hashtbl.add tbl s (color, css_class))
 107:     default_keyword_color_list;
 108:   tbl
 109: 
 110: let all_colors =
 111:   construct_color ::
 112:     comment_color ::
 113:     string_color ::
 114:     alpha_keyword_color ::
 115:     nonalpha_keyword_color ::
 116:     (List.map snd default_keyword_color_list)
 117: 
 118: let make_css ?(colors = all_colors) file =
 119:   let oc = open_out file in
 120:   let color_groups = 
 121:     Hashtbl2.list_all (Hashtbl2.of_list 50 colors) in
 122:   List.iter 
 123:     (fun (opt, l) -> 
 124:        let contents =
 125:          match opt with 
 126:              None -> ""
 127:            | Some color -> "color: " ^ color in
 128:        fprintf oc ".%s { %s }\n" 
 129:          (String.concat ",\n." (List.sort String.compare l)) contents)
 130:     color_groups;
 131:   close_out oc
 132: 
 133: type param = 
 134:     { line_numbers : bool; 
 135:       title : bool;
 136:       tab_size : int;
 137:       footnote : bool;
 138:       css : bool;
 139:       css_url : string;
 140:       html_comments : bool;
 141:       charset : string }
 142: 
 143: let default_param =
 144:   { line_numbers = false; 
 145:     title = false;
 146:     tab_size = 8;
 147:     footnote = true;
 148:     css = false;
 149:     css_url = "style.css";
 150:     html_comments = false;
 151:     charset = "iso-8859-1" }
 152: 
 153: 
 154: let add_string buf nbsp s = 
 155:   String.iter
 156:     (function
 157:          '<' -> Buffer.add_string buf "&lt;"
 158:        | '>' -> Buffer.add_string buf "&gt;"
 159:        | '&' -> Buffer.add_string buf "&amp;"
 160:        | ' ' when nbsp -> Buffer.add_string buf "&nbsp;"
 161:        | c -> Buffer.add_char buf c)
 162:     s
 163: 
 164: 
 165: let line_comment p buf i =
 166:   if p.line_numbers then
 167:     bprintf buf "<span style=\"background-color:silver\">%4d:</span> " i
 168:       
 169: let colorize ?(comment = false) p buf nbsp (opt, clas) s =
 170:   let add =
 171:     if comment && p.html_comments then Buffer.add_string buf
 172:     else add_string buf nbsp in
 173:   if p.css then
 174:     (bprintf buf "<span class=\"%s\">" clas;
 175:      add s;
 176:      Buffer.add_string buf "</span>")
 177:   else
 178:     match opt with
 179:         None -> add s
 180:       | Some color ->
 181:           bprintf buf "<span style=\"color:%s\">" color;
 182:           add s;
 183:           Buffer.add_string buf "</span>"
 184: 
 185: let rec fold_left f accu l =
 186:   match l with
 187:       [] -> accu
 188:     | a :: rest -> fold_left f (f accu a rest) rest
 189: 
 190: let ocaml
 191:   ?(nbsp = false)
 192:   ?(keyword_colors = default_keyword_colors)
 193:   ?(param = default_param)
 194:   buf l =
 195:   
 196:   let last_line =
 197:     fold_left
 198:       (fun line token rest ->
 199:          match token with
 200:              `String s ->
 201:                colorize param buf nbsp string_color s;
 202:                line
 203:            | `Token s ->
 204:                add_string buf nbsp s;
 205:                line
 206:            | `Comment s ->
 207:                colorize ~comment:true param buf nbsp comment_color s;
 208:                line
 209:            | `Construct s ->
 210:                colorize param buf nbsp construct_color s;
 211:                line
 212:            | `Keyword k ->
 213:                (try 
 214:                   let color = Hashtbl.find keyword_colors k in
 215:                   colorize param buf nbsp color k;
 216:                 with Not_found -> 
 217:                   let color =
 218:                     match k.[0] with
 219:                         'a' .. 'z' -> alpha_keyword_color
 220:                       | _ -> nonalpha_keyword_color in
 221:                   colorize param buf nbsp color k);
 222:                line
 223:            | `Newline ->
 224:                Buffer.add_char buf '\n';
 225:                if rest <> [] then
 226:                  line_comment param buf line;
 227:                line + 1
 228:            | `Tab ->
 229:                if param.tab_size < 0 then Buffer.add_char buf '\t'
 230:                else add_string buf nbsp (String.make param.tab_size ' ');
 231:                line)
 232:       2 l in
 233:   ()
 234: 
 235: let ocamlcode
 236:   ?keyword_colors
 237:   ?(param = default_param)
 238:   ?(tag_open = "<code>")
 239:   ?(tag_close = "</code>")
 240:   s =
 241:   let buf = Buffer.create (10 * String.length s) in
 242:   Buffer.add_string buf tag_open;
 243:   line_comment param buf 1;
 244:   ocaml ?keyword_colors ~param ~nbsp:true buf (Input.string s);
 245:   Buffer.add_string buf tag_close;
 246:   Buffer.contents buf
 247: 
 248: let ocamlpre
 249:   ?keyword_colors
 250:   ?(param = default_param)
 251:   ?(tag_open = "<pre>")
 252:   ?(tag_close = "</pre>")
 253:   s =
 254:   let buf = Buffer.create (10 * String.length s) in
 255:   Buffer.add_string buf tag_open;
 256:   line_comment param buf 1;
 257:   ocaml ?keyword_colors ~param ~nbsp:false buf (Input.string s);
 258:   Buffer.add_string buf tag_close;
 259:   Buffer.contents buf
 260: 
 261: 
 262: let is_valid_anchor =
 263:   let re = Str.regexp "[A-Za-z][-A-Za-z0-9_:.]*$" in
 264:   fun s -> Str.string_match re s 0
 265:     
 266: let ocaml_file
 267:   ?(filename = "") 
 268:   ?keyword_colors
 269:   ?(param = default_param)
 270:   buf l =
 271:   
 272:   let anchor = 
 273:     if is_valid_anchor filename then 
 274:       sprintf "<a name=\"%s\"></a>" filename
 275:     else "" in
 276: 
 277:   if param.title then
 278:     (bprintf buf "<h1>%s<code>%s</code></h1>\n" anchor filename;
 279:      Buffer.add_string buf "\n<pre>")
 280:   else
 281:     bprintf buf "\n<pre>%s" anchor;
 282: 
 283:   line_comment param buf 1;
 284:   ocaml ?keyword_colors ~param buf l;
 285:   Buffer.add_string buf "</pre>\n"
 286: 
 287: 
 288: let begin_document 
 289:   ?(param = default_param)
 290:   buf files =
 291:   let rec make_title = function
 292:     | [] -> ()
 293:     | [a] -> Buffer.add_string buf a
 294:     | a :: l -> Printf.bprintf buf "%s, " a; make_title l in
 295:   bprintf buf "\
 296: <!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \
 297: \"http://www.w3.org/TR/html4/strict.dtd\">
 298: <html>
 299: <head>
 300:   <meta http-equiv=\"content-type\" content=\"text/html; charset=%s\">
 301:   <title>
 302: " param.charset;
 303:   make_title files;
 304:   Printf.bprintf buf
 305:     "</title>\n  <meta name=\"GENERATOR\" content=\"%s\">\n" version;
 306:   if param.css then
 307:     Printf.bprintf buf
 308:       "  <link rel=\"stylesheet\" href=\"%s\" type=\"text/css\">\n" 
 309:       param.css_url;
 310:   Buffer.add_string buf "</head>\n<body>\n"
 311: 
 312: 
 313: let end_document ?(param = default_param) buf =
 314:   if param.footnote then
 315:     Buffer.add_string buf "
 316: <hr>
 317: <p>
 318: <em>This document was generated using 
 319: <a href=\"http://martin.jambon.free.fr/caml2html.html\">caml2html</a></em>
 320: ";
 321:   Buffer.add_string buf "</body>\n</html>\n"
 322: 
 323: 
 324: let handle_file ?keyword_colors ?param buf filename =
 325:   let l = Input.file filename in
 326:   ocaml_file ?keyword_colors ?param ~filename buf l
 327: 
 328: let save_file ?(dir = "") buf file =
 329:   let dir_res_name =
 330:     if dir = "" then file
 331:     else Filename.concat dir file in
 332:   let chan_out = open_out dir_res_name in
 333:   Buffer.output_buffer chan_out buf;
 334:   close_out chan_out
 335: 
 336: let ocaml_document ?dir ?keyword_colors ?param files outfile =
 337:   let buf = Buffer.create 50_000 in
 338:   begin_document ?param buf files;
 339:   let rec tmp = function
 340:     | [] -> ()
 341:     | [x] -> handle_file ?keyword_colors ?param buf x
 342:     | x :: l ->
 343:         handle_file ?keyword_colors ?param buf x;
 344:         Buffer.add_string buf "\n<hr>\n";
 345:         tmp l in
 346:   tmp files;
 347:   end_document ?param buf;
 348:   save_file ?dir buf outfile

This document was generated using caml2html