ocaml-xml-light-2.2/000075500000000000000000000000001167637302700143655ustar00rootroot00000000000000ocaml-xml-light-2.2/Makefile000064400000000000000000000054441167637302700160340ustar00rootroot00000000000000# Makefile generated by OCamake # http://tech.motion-twin.com .SUFFIXES : .ml .mli .cmo .cmx .cmi .mll .mly INSTALLDIR=`ocamlc -where` CFLAGS= LFLAGS= -a LIBS= all: xml-light.cma test.exe doc opt: xml-light.cmxa test_opt.exe install: all opt cp xml-light.cmxa xml-light.a xml-light.cma xml.mli xmlParser.mli dtd.mli xml.cmi xmlParser.cmi dtd.cmi xml.cmx dtd.cmx xmlParser.cmx $(INSTALLDIR) doc: mkdir doc ocamldoc -sort -html -d doc xml.mli dtd.mli xmlParser.mli test.exe: xml-light.cma ocamlc xml-light.cma test.ml -o test.exe test_opt.exe: xml-light.cmxa ocamlopt xml-light.cmxa test.ml -o test_opt.exe xml-light.cma: xml_parser.cmo xml_lexer.cmo dtd.cmo xmlParser.cmo xml.cmo ocamlc -o xml-light.cma $(LFLAGS) $(LIBS) xml_parser.cmo xml_lexer.cmo dtd.cmo xmlParser.cmo xml.cmo xml-light.cmxa: xml_parser.cmx xml_lexer.cmx dtd.cmx xmlParser.cmx xml.cmx ocamlopt -o xml-light.cmxa $(LFLAGS) $(LIBS) xml_parser.cmx xml_lexer.cmx dtd.cmx xmlParser.cmx xml.cmx dtd.cmo: xml.cmi xml_lexer.cmi dtd.cmi dtd.cmx: xml.cmi xml_lexer.cmi dtd.cmi xml.cmo: dtd.cmi xmlParser.cmi xml_lexer.cmi xml.cmi xml.cmx: dtd.cmi xmlParser.cmi xml_lexer.cmi xml.cmi xmlParser.cmo: dtd.cmi xml.cmi xml_lexer.cmi xmlParser.cmi xmlParser.cmx: dtd.cmi xml.cmi xml_lexer.cmi xmlParser.cmi dtd.cmi: xml.cmi xml.cmi: xmlParser.cmi: dtd.cmi xml.cmi xml_lexer.cmi: dtd.cmi xml_parser.cmo: xml_parser.ml dtd.cmi xml_parser.mli xml_parser.cmi xml_parser.cmx: xml_parser.ml dtd.cmi xml_parser.mli xml_parser.cmi xml_lexer.cmo: xml_lexer.ml xml_lexer.cmi xml_lexer.cmx: xml_lexer.ml xml_lexer.cmi clean: rm -f xml-light.cma test.exe dtd.cmo dtd.cmi test.cmo test.cmi xml.cmo xml.cmi xmlParser.cmo xmlParser.cmi dtd.cmi xml.cmi xmlParser.cmi xml_lexer.cmi xml_lexer.cmo xml_lexer.ml xml_parser.mli xml_parser.cmi xml_parser.ml xml_parser.cmo rm -f xml_light.lib xml_light.a xml-light.cmxa test_opt.exe dtd.cmx dtd.obj dtd.o test.cmx test.obj test.o xml.cmx xml.obj xml.o xmlParser.cmx xmlParser.obj xmlParser.o xml_lexer.cmx xml_lexer.obj xml_lexer.o xml_parser.cmx xml_parser.obj xml_parser.o wclean: -@del xml-light.cma test.exe dtd.cmo dtd.cmi test.cmo test.cmi xml.cmo xml.cmi xmlParser.cmo xmlParser.cmi dtd.cmi xml.cmi xmlParser.cmi xml_lexer.cmi xml_lexer.cmo xml_lexer.ml xml_parser.mli xml_parser.cmi xml_parser.ml xml_parser.cmo 2>NUL -@del xml_light.lib xml_light.a xml-light.cmxa test_opt.exe dtd.cmx dtd.obj dtd.o test.cmx test.obj test.o xml.cmx xml.obj xml.o xmlParser.cmx xmlParser.obj xmlParser.o xml_lexer.cmx xml_lexer.obj xml_lexer.o xml_parser.cmx xml_parser.obj xml_parser.o 2>NUL # SUFFIXES .ml.cmo: ocamlc $(CFLAGS) -c $< .ml.cmx: ocamlopt $(CFLAGS) -c $< .mli.cmi: ocamlc $(CFLAGS) $< .mll.ml: ocamllex $< .mly.ml: ocamlyacc $< ocaml-xml-light-2.2/README000064400000000000000000000027601167637302700152520ustar00rootroot00000000000000Xml-Light Version 2.2 : ----------------------- Last version : http://tech.motion-twin.com Xml Light is a minimal Xml parser & printer for OCaml. It provide few functions to parse a basic Xml document into an OCaml data structure and to print back the data structures to an Xml document. Xml Light has also support for DTD (Document Type Definition). Install ------- make install by default, Xml Light is installed in the 'ocamlc -where' directory. you can change it by editing the Makefile. for Windows users, if you're using the MSVC version of ocaml and don't have cygwin tools installed, you can do : nmake all and then copy manually the files to the place you want. Usage ----- simple samples : -- parse / print an xml string --- let x = Xml.parse_string "TEXT" in Printf.printf "XML formated = \n%s" (Xml.to_string_fmt x); -- load an xml and a dtd , prove and print --- let x = Xml.parse_file "myfile.xml" in let dtd = Dtd.parse_file "myfile.dtd" in let x = Dtd.prove (Dtd.check dtd) "start" x in print_endline (Xml.to_string x) Documentation ------------- HTML documentation can be generated with ocamldoc : make doc you can also directly browse the MLI files to read it. Licence ------- Xml Light is LGPL Credits ------- (c)2003-2005 Nicolas Cannasse (ncannasse@motion-twin.com) (c)2003-2005 Motion-Twin Some parts of this code source has an additionnal copyright to Jacques Garrigue ocaml-xml-light-2.2/dtd.ml000064400000000000000000000352541167637302700155030ustar00rootroot00000000000000(* * Xml Light, an small Xml parser/printer with DTD support. * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *) open Xml open Printf type parse_error_msg = | InvalidDTDDecl | InvalidDTDElement | InvalidDTDAttribute | InvalidDTDTag | DTDItemExpected type check_error = | ElementDefinedTwice of string | AttributeDefinedTwice of string * string | ElementEmptyContructor of string | ElementReferenced of string * string | ElementNotDeclared of string type prove_error = | UnexpectedPCData | UnexpectedTag of string | UnexpectedAttribute of string | InvalidAttributeValue of string | RequiredAttribute of string | ChildExpected of string | EmptyExpected type dtd_child = | DTDTag of string | DTDPCData | DTDOptional of dtd_child | DTDZeroOrMore of dtd_child | DTDOneOrMore of dtd_child | DTDChoice of dtd_child list | DTDChildren of dtd_child list type dtd_element_type = | DTDEmpty | DTDAny | DTDChild of dtd_child type dtd_attr_default = | DTDDefault of string | DTDRequired | DTDImplied | DTDFixed of string type dtd_attr_type = | DTDCData | DTDNMToken | DTDEnum of string list type dtd_item = | DTDAttribute of string * string * dtd_attr_type * dtd_attr_default | DTDElement of string * dtd_element_type type dtd_result = | DTDNext | DTDNotMatched | DTDMatched | DTDMatchedResult of dtd_child type error_pos = { eline : int; eline_start : int; emin : int; emax : int; } type parse_error = parse_error_msg * Xml.error_pos exception Parse_error of parse_error exception Check_error of check_error exception Prove_error of prove_error type dtd = dtd_item list type ('a,'b) hash = ('a,'b) Hashtbl.t type checked = { c_elements : (string,dtd_element_type) hash; c_attribs : (string,(string,(dtd_attr_type * dtd_attr_default)) hash) hash; } type dtd_state = { elements : (string,dtd_element_type) hash; attribs : (string,(string,(dtd_attr_type * dtd_attr_default)) hash) hash; mutable current : dtd_element_type; mutable curtag : string; state : (string * dtd_element_type) Stack.t; } let file_not_found = ref (fun _ -> assert false) let _raises e = file_not_found := e let empty_hash = Hashtbl.create 0 let pos source = let line, lstart, min, max = Xml_lexer.pos source in (Obj.magic { eline = line; eline_start = lstart; emin = min; emax = max; } : Xml.error_pos) let convert = function | Xml_lexer.EInvalidDTDDecl -> InvalidDTDDecl | Xml_lexer.EInvalidDTDElement -> InvalidDTDElement | Xml_lexer.EInvalidDTDTag -> InvalidDTDTag | Xml_lexer.EDTDItemExpected -> DTDItemExpected | Xml_lexer.EInvalidDTDAttribute -> InvalidDTDAttribute let parse source = try Xml_lexer.init source; (* local cast Dtd.dtd -> dtd *) let dtd = (Obj.magic Xml_lexer.dtd source : dtd) in Xml_lexer.close source; dtd with | Xml_lexer.DTDError e -> Xml_lexer.close source; raise (Parse_error (convert e,pos source)) let parse_string s = parse (Lexing.from_string s) let parse_in ch = parse (Lexing.from_channel ch) let parse_file fname = let ch = (try open_in fname with Sys_error _ -> raise (!file_not_found fname)) in try let x = parse (Lexing.from_channel ch) in close_in ch; x with e -> close_in ch; raise e let check dtd = let attribs = Hashtbl.create 0 in let hdone = Hashtbl.create 0 in let htodo = Hashtbl.create 0 in let ftodo tag from = try ignore(Hashtbl.find hdone tag); with Not_found -> try match Hashtbl.find htodo tag with | None -> Hashtbl.replace htodo tag from | Some _ -> () with Not_found -> Hashtbl.add htodo tag from in let fdone tag edata = try ignore(Hashtbl.find hdone tag); raise (Check_error (ElementDefinedTwice tag)); with Not_found -> Hashtbl.remove htodo tag; Hashtbl.add hdone tag edata in let fattrib tag aname adata = let h = (try Hashtbl.find attribs tag with Not_found -> let h = Hashtbl.create 1 in Hashtbl.add attribs tag h; h) in try ignore(Hashtbl.find h aname); raise (Check_error (AttributeDefinedTwice (tag,aname))); with Not_found -> Hashtbl.add h aname adata in let check_item = function | DTDAttribute (tag,aname,atype,adef) -> let utag = String.uppercase tag in ftodo utag None; fattrib utag (String.uppercase aname) (atype,adef) | DTDElement (tag,etype) -> let utag = String.uppercase tag in fdone utag etype; let check_type = function | DTDEmpty -> () | DTDAny -> () | DTDChild x -> let rec check_child = function | DTDTag s -> ftodo (String.uppercase s) (Some utag) | DTDPCData -> () | DTDOptional c | DTDZeroOrMore c | DTDOneOrMore c -> check_child c | DTDChoice [] | DTDChildren [] -> raise (Check_error (ElementEmptyContructor tag)) | DTDChoice l | DTDChildren l -> List.iter check_child l in check_child x in check_type etype in List.iter check_item dtd; Hashtbl.iter (fun t from -> match from with | None -> raise (Check_error (ElementNotDeclared t)) | Some tag -> raise (Check_error (ElementReferenced (t,tag))) ) htodo; { c_elements = hdone; c_attribs = attribs; } let start_prove dtd root = let d = { elements = dtd.c_elements; attribs = dtd.c_attribs; state = Stack.create(); current = DTDChild (DTDTag root); curtag = "_root"; } in try ignore(Hashtbl.find d.elements (String.uppercase root)); d with Not_found -> raise (Check_error (ElementNotDeclared root)) (* - for debug only - *) let to_string_ref = ref (fun _ -> assert false) let trace dtd tag = let item = DTDElement ("current",dtd.current) in printf "%s : %s\n" (match tag with None -> "#PCDATA" | Some t -> t) (!to_string_ref item) exception TmpResult of dtd_result let prove_child dtd tag = trace dtd tag; match dtd.current with | DTDEmpty -> raise (Prove_error EmptyExpected) | DTDAny -> () | DTDChild elt -> let rec update = function | DTDTag s -> (match tag with | None -> DTDNotMatched | Some t when t = String.uppercase s -> DTDMatched | Some _ -> DTDNotMatched) | DTDPCData -> (match tag with | None -> DTDMatched | Some _ -> DTDNotMatched) | DTDOptional x -> (match update x with | DTDNotMatched | DTDNext -> DTDNext | DTDMatched | DTDMatchedResult _ -> DTDMatched) | DTDZeroOrMore x -> (match update x with | DTDNotMatched | DTDNext -> DTDNext | DTDMatched | DTDMatchedResult _ -> DTDMatchedResult (DTDZeroOrMore x)) | DTDOneOrMore x -> (match update x with | DTDNotMatched | DTDNext -> DTDNotMatched | DTDMatched | DTDMatchedResult _ -> DTDMatchedResult (DTDZeroOrMore x)) | DTDChoice l -> (try (match List.exists (fun x -> match update x with | DTDMatched -> true | DTDMatchedResult _ as r -> raise (TmpResult r) | DTDNext | DTDNotMatched -> false) l with | true -> DTDMatched | false -> DTDNotMatched) with TmpResult r -> r) | DTDChildren [] -> assert false (* DTD is checked ! *) | DTDChildren (h :: t) -> (match update h with | DTDNext -> (match t with | [] -> DTDNotMatched | _ -> update (DTDChildren t)) | DTDNotMatched -> DTDNotMatched | DTDMatchedResult r -> DTDMatchedResult (DTDChildren (r::t)) | DTDMatched -> match t with | [] -> DTDMatched | _ -> DTDMatchedResult (DTDChildren t)) in match update elt with | DTDNext | DTDNotMatched -> (match tag with | None -> raise (Prove_error UnexpectedPCData) | Some t -> raise (Prove_error (UnexpectedTag t))) | DTDMatched -> dtd.current <- DTDEmpty | DTDMatchedResult r -> dtd.current <- DTDChild r let is_nmtoken_char = function | 'A'..'Z' | 'a'..'z' | '0'..'9' | '.' | '-' | '_' | ':' -> true | _ -> false let prove_attrib dtd attr aname (atype,adef) accu = let aval = (try Some (List.assoc aname attr) with Not_found -> None) in (match atype, aval with | DTDCData, _ -> () | DTDNMToken, None -> () | DTDNMToken, Some v -> for i = 0 to String.length v - 1 do if not (is_nmtoken_char v.[i]) then raise (Prove_error (InvalidAttributeValue aname)); done | DTDEnum l, None -> () | DTDEnum l, Some v -> if not (List.exists ((=) v) l) then raise (Prove_error (InvalidAttributeValue aname))); match adef, aval with | DTDRequired, None -> raise (Prove_error (RequiredAttribute aname)) | DTDFixed v, Some av when v <> av -> raise (Prove_error (InvalidAttributeValue aname)) | DTDImplied, None -> accu | DTDFixed v , None | DTDDefault _, Some v | DTDDefault v, None | DTDRequired, Some v | DTDImplied, Some v | DTDFixed _, Some v -> (aname,v) :: accu let check_attrib ahash (aname,_) = try ignore(Hashtbl.find ahash aname); with Not_found -> raise (Prove_error (UnexpectedAttribute aname)) let rec do_prove dtd = function | PCData s -> prove_child dtd None; PCData s | Element (tag,attr,childs) -> let utag = String.uppercase tag in let uattr = List.map (fun (aname,aval) -> String.uppercase aname , aval) attr in prove_child dtd (Some utag); Stack.push (dtd.curtag,dtd.current) dtd.state; let elt = (try Hashtbl.find dtd.elements utag with Not_found -> raise (Prove_error (UnexpectedTag tag))) in let ahash = (try Hashtbl.find dtd.attribs utag with Not_found -> empty_hash) in dtd.curtag <- tag; dtd.current <- elt; List.iter (check_attrib ahash) uattr; let attr = Hashtbl.fold (prove_attrib dtd uattr) ahash [] in let childs = ref (List.map (do_prove dtd) childs) in (match dtd.current with | DTDAny | DTDEmpty -> () | DTDChild elt -> let name = ref "" in let rec check = function | DTDTag t -> name := t; false | DTDPCData when !childs = [] -> childs := [PCData ""]; true | DTDPCData -> name := "#PCDATA"; false | DTDOptional _ -> true | DTDZeroOrMore _ -> true | DTDOneOrMore e -> ignore(check e); false | DTDChoice l -> List.exists check l | DTDChildren l -> List.for_all check l in match check elt with | true -> () | false -> raise (Prove_error (ChildExpected !name))); let ctag, cur = Stack.pop dtd.state in dtd.curtag <- tag; dtd.current <- cur; Element (tag,attr,!childs) let prove dtd root xml = do_prove (start_prove dtd root) xml let parse_error_msg = function | InvalidDTDDecl -> "Invalid DOCTYPE declaration" | InvalidDTDElement -> "Invalid DTD element declaration" | InvalidDTDAttribute -> "Invalid DTD attribute declaration" | InvalidDTDTag -> "Invalid DTD tag" | DTDItemExpected -> "DTD item expected" let parse_error (msg,pos) = let pos = (Obj.magic pos : error_pos) in if pos.emin = pos.emax then sprintf "%s line %d character %d" (parse_error_msg msg) pos.eline (pos.emin - pos.eline_start) else sprintf "%s line %d characters %d-%d" (parse_error_msg msg) pos.eline (pos.emin - pos.eline_start) (pos.emax - pos.eline_start) let check_error = function | ElementDefinedTwice tag -> sprintf "Element '%s' defined twice" tag | AttributeDefinedTwice (tag,aname) -> sprintf "Attribute '%s' of element '%s' defined twice" aname tag | ElementEmptyContructor tag -> sprintf "Element '%s' has empty constructor" tag | ElementReferenced (tag,from) -> sprintf "Element '%s' referenced by '%s' is not declared" tag from | ElementNotDeclared tag -> sprintf "Element '%s' needed but is not declared" tag let prove_error = function | UnexpectedPCData -> "Unexpected PCData" | UnexpectedTag tag -> sprintf "Unexpected tag : '%s'" tag | UnexpectedAttribute att -> sprintf "Unexpected attribute : '%s'" att | InvalidAttributeValue att -> sprintf "Invalid attribute value for '%s'" att | RequiredAttribute att -> sprintf "Required attribute not found : '%s'" att | ChildExpected cname -> sprintf "Child expected : '%s'" cname | EmptyExpected -> "No more children expected" let to_string = function | DTDAttribute (tag,aname,atype,adef) -> let atype_to_string = function | DTDCData -> "CDATA" | DTDNMToken -> "NMTOKEN" | DTDEnum l -> sprintf "(%s)" (String.concat "|" l) in let adefault_to_string = function | DTDDefault s -> sprintf "\"%s\"" s | DTDRequired -> "#REQUIRED" | DTDImplied -> "#IMPLIED" | DTDFixed s -> sprintf "#FIXED \"%s\"" s in sprintf "" tag aname (atype_to_string atype) (adefault_to_string adef) | DTDElement (tag,etype) -> let rec echild_to_string = function | DTDTag s -> s | DTDPCData -> "#PCDATA" | DTDOptional c -> sprintf "%s?" (echild_to_string c) | DTDZeroOrMore c -> sprintf "%s*" (echild_to_string c) | DTDOneOrMore c -> sprintf "%s+" (echild_to_string c) | DTDChoice [c] -> echild_to_string c | DTDChoice l -> sprintf "(%s)" (String.concat "|" (List.map echild_to_string l)) | DTDChildren [c] -> echild_to_string c | DTDChildren l -> sprintf "(%s)" (String.concat "," (List.map echild_to_string l)) in let etype_to_string = function | DTDEmpty -> "EMPTY" | DTDAny -> "ANY" | DTDChild x -> let rec op_to_string = function | DTDOptional c -> sprintf "%s?" (op_to_string c) | DTDZeroOrMore c -> sprintf "%s*" (op_to_string c) | DTDOneOrMore c -> sprintf "%s+" (op_to_string c) | _ -> "" in let rec root = function | DTDOptional c | DTDZeroOrMore c | DTDOneOrMore c -> root c | DTDChoice [_] | DTDChildren [_] as x -> x, false | DTDChoice _ | DTDChildren _ as x -> x, true | x -> x, false in match root x with | r, true -> sprintf "%s%s" (echild_to_string r) (op_to_string x) | r, false -> sprintf "(%s%s)" (echild_to_string r) (op_to_string x) in sprintf "" tag (etype_to_string etype) ;; to_string_ref := to_stringocaml-xml-light-2.2/dtd.mli000064400000000000000000000127761167637302700156600ustar00rootroot00000000000000(* * Xml Light, an small Xml parser/printer with DTD support. * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *) (** Xml Light DTD This module provide several functions to create, check, and use DTD to prove Xml documents : {ul {li using the DTD types, you can directly create your own DTD structure} {li the {!Dtd.check} function can then be used to check that all DTD states have been declared, that no attributes are declared twice, and so on.} {li the {!Dtd.prove} function can be used to check an {!Xml} data structure with a checked DTD. The function will return the expanded Xml document or raise an exception if the DTD proving fails.} } {i Note about ENTITIES:} While parsing Xml, PCDATA is always parsed and the Xml entities & > < ' " are replaced by their corresponding ASCII characters. For Xml attributes, theses can be put between either double or simple quotes, and the backslash character can be used to escape inner quotes. There is no support for CDATA Xml nodes or PCDATA attributes declarations in DTD, and no support for user-defined entities using the ENTITY DTD element. *) (** {6 The DTD Types} *) type dtd_child = | DTDTag of string | DTDPCData | DTDOptional of dtd_child | DTDZeroOrMore of dtd_child | DTDOneOrMore of dtd_child | DTDChoice of dtd_child list | DTDChildren of dtd_child list type dtd_element_type = | DTDEmpty | DTDAny | DTDChild of dtd_child type dtd_attr_default = | DTDDefault of string | DTDRequired | DTDImplied | DTDFixed of string type dtd_attr_type = | DTDCData | DTDNMToken | DTDEnum of string list type dtd_item = | DTDAttribute of string * string * dtd_attr_type * dtd_attr_default | DTDElement of string * dtd_element_type type dtd = dtd_item list type checked (** {6 The DTD Functions} *) (** Parse the named file into a Dtd data structure. Raise {!Xml.File_not_found} if an error occured while opening the file. Raise {!Dtd.Parse_error} if parsing failed. *) val parse_file : string -> dtd (** Read the content of the in_channel and parse it into a Dtd data structure. Raise {!Dtd.Parse_error} if parsing failed. *) val parse_in : in_channel -> dtd (** Parse the string containing a Dtd document into a Dtd data structure. Raise {!Dtd.Parse_error} if parsing failed. *) val parse_string : string -> dtd (** Check the Dtd data structure declaration and return a checked DTD. Raise {!Dtd.Check_error} if the DTD checking failed. *) val check : dtd -> checked (** Prove an Xml document using a checked DTD and an entry point. The entry point is the first excepted tag of the Xml document, the returned Xml document has the same structure has the original one, excepted that non declared optional attributes have been set to their default value specified in the DTD. Raise {!Dtd.Check_error} [ElementNotDeclared] if the entry point is not found, raise {!Dtd.Prove_error} if the Xml document failed to be proved with the DTD. *) val prove : checked -> string -> Xml.xml -> Xml.xml (** Print a DTD element into a string. You can easily get a DTD document from a DTD data structure using for example [String.concat "\n" (List.map Dtd.to_string) my_dtd] *) val to_string : dtd_item -> string (** {6 The DTD Exceptions} *) (** There is three types of DTD excecptions : {ul {li {!Dtd.Parse_error} is raised when an error occured while parsing a DTD document into a DTD data structure.} {li {!Dtd.Check_error} is raised when an error occured while checking a DTD data structure for completeness, or when the prove entry point is not found when calling {!Dtd.prove}.} {li {!Dtd.Prove_error} is raised when an error occured while proving an Xml document.} } Several string conversion functions are provided to enable you to report errors to the user. *) type parse_error_msg = | InvalidDTDDecl | InvalidDTDElement | InvalidDTDAttribute | InvalidDTDTag | DTDItemExpected type check_error = | ElementDefinedTwice of string | AttributeDefinedTwice of string * string | ElementEmptyContructor of string | ElementReferenced of string * string | ElementNotDeclared of string type prove_error = | UnexpectedPCData | UnexpectedTag of string | UnexpectedAttribute of string | InvalidAttributeValue of string | RequiredAttribute of string | ChildExpected of string | EmptyExpected type parse_error = parse_error_msg * Xml.error_pos exception Parse_error of parse_error exception Check_error of check_error exception Prove_error of prove_error val parse_error : parse_error -> string val check_error : check_error -> string val prove_error : prove_error -> string (**/**) (* internal usage only... *) val _raises : (string -> exn) -> unitocaml-xml-light-2.2/test.ml000064400000000000000000000035311167637302700157000ustar00rootroot00000000000000(* * Xml Light, an small Xml parser/printer with DTD support. * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *) open Xml open Dtd let parse data = match data.[0] with | '#' -> Xml.parse_file (String.sub data 1 ((String.length data)-2)) | _ -> Xml.parse_string data ;; let buf = ref "" in print_endline "Please enter some XML data followed (press return twice to parse) :"; try while true do match read_line() with | "" when !buf <> "" -> let data = !buf in buf := ""; (try let x = parse data in print_endline "Parsing..."; print_endline (Xml.to_string_fmt x); with | Xml.Error msg as e -> Printf.printf "Xml error : %s\n" (Xml.error msg) | Dtd.Parse_error msg as e -> Printf.printf "Dtd parse error : %s\n" (Dtd.parse_error msg) | Dtd.Check_error msg as e -> Printf.printf "Dtd check error : %s\n" (Dtd.check_error msg) | Dtd.Prove_error msg as e -> Printf.printf "Dtd prove error : %s\n" (Dtd.prove_error msg)) | s -> buf := !buf ^ s ^ "\n" done with End_of_file -> print_endline "Exit."ocaml-xml-light-2.2/xml.ml000064400000000000000000000162341167637302700155250ustar00rootroot00000000000000(* * Xml Light, an small Xml parser/printer with DTD support. * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *) open Printf type xml = | Element of (string * (string * string) list * xml list) | PCData of string type error_pos = { eline : int; eline_start : int; emin : int; emax : int; } type error_msg = | UnterminatedComment | UnterminatedString | UnterminatedEntity | IdentExpected | CloseExpected | NodeExpected | AttributeNameExpected | AttributeValueExpected | EndOfTagExpected of string | EOFExpected type error = error_msg * error_pos exception Error of error exception File_not_found of string exception Not_element of xml exception Not_pcdata of xml exception No_attribute of string let default_parser = XmlParser.make() let pos source = let line, lstart, min, max = Xml_lexer.pos source in { eline = line; eline_start = lstart; emin = min; emax = max; } let parse (p:XmlParser.t) (source:XmlParser.source) = (* local cast Xml.xml -> xml *) (Obj.magic XmlParser.parse p source : xml) let parse_in ch = parse default_parser (XmlParser.SChannel ch) let parse_string str = parse default_parser (XmlParser.SString str) let parse_file f = let p = XmlParser.make() in let path = Filename.dirname f in XmlParser.resolve p (fun file -> let name = (match path with "." -> file | _ -> path ^ "/" ^ file) in Dtd.check (Dtd.parse_file name) ); parse p (XmlParser.SFile f) let error_msg = function | UnterminatedComment -> "Unterminated comment" | UnterminatedString -> "Unterminated string" | UnterminatedEntity -> "Unterminated entity" | IdentExpected -> "Ident expected" | CloseExpected -> "Element close expected" | NodeExpected -> "Xml node expected" | AttributeNameExpected -> "Attribute name expected" | AttributeValueExpected -> "Attribute value expected" | EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag | EOFExpected -> "End of file expected" let error (msg,pos) = if pos.emin = pos.emax then sprintf "%s line %d character %d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) else sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) (pos.emax - pos.eline_start) let line e = e.eline let range e = e.emin - e.eline_start , e.emax - e.eline_start let abs_range e = e.emin , e.emax let tag = function | Element (tag,_,_) -> tag | x -> raise (Not_element x) let pcdata = function | PCData text -> text | x -> raise (Not_pcdata x) let attribs = function | Element (_,attr,_) -> attr | x -> raise (Not_element x) let attrib x att = match x with | Element (_,attr,_) -> (try let att = String.lowercase att in snd (List.find (fun (n,_) -> String.lowercase n = att) attr) with Not_found -> raise (No_attribute att)) | x -> raise (Not_element x) let children = function | Element (_,_,clist) -> clist | x -> raise (Not_element x) (*let enum = function | Element (_,_,clist) -> List.to_enum clist | x -> raise (Not_element x) *) let iter f = function | Element (_,_,clist) -> List.iter f clist | x -> raise (Not_element x) let map f = function | Element (_,_,clist) -> List.map f clist | x -> raise (Not_element x) let fold f v = function | Element (_,_,clist) -> List.fold_left f v clist | x -> raise (Not_element x) let tmp = Buffer.create 200 let buffer_pcdata text = let l = String.length text in for p = 0 to l-1 do match text.[p] with | '>' -> Buffer.add_string tmp ">" | '<' -> Buffer.add_string tmp "<" | '&' -> if p < l-1 && text.[p+1] = '#' then Buffer.add_char tmp '&' else Buffer.add_string tmp "&" | '\'' -> Buffer.add_string tmp "'" | '"' -> Buffer.add_string tmp """ | c -> Buffer.add_char tmp c done let buffer_attr (n,v) = Buffer.add_char tmp ' '; Buffer.add_string tmp n; Buffer.add_string tmp "=\""; let l = String.length v in for p = 0 to l-1 do match v.[p] with | '\\' -> Buffer.add_string tmp "\\\\" | '"' -> Buffer.add_string tmp "\\\"" | c -> Buffer.add_char tmp c done; Buffer.add_char tmp '"' let to_string x = let pcdata = ref false in let rec loop = function | Element (tag,alist,[]) -> Buffer.add_char tmp '<'; Buffer.add_string tmp tag; List.iter buffer_attr alist; Buffer.add_string tmp "/>"; pcdata := false; | Element (tag,alist,l) -> Buffer.add_char tmp '<'; Buffer.add_string tmp tag; List.iter buffer_attr alist; Buffer.add_char tmp '>'; pcdata := false; List.iter loop l; Buffer.add_string tmp "'; pcdata := false; | PCData text -> if !pcdata then Buffer.add_char tmp ' '; buffer_pcdata text; pcdata := true; in Buffer.reset tmp; loop x; let s = Buffer.contents tmp in Buffer.reset tmp; s let to_string_fmt x = let rec loop ?(newl=false) tab = function | Element (tag,alist,[]) -> Buffer.add_string tmp tab; Buffer.add_char tmp '<'; Buffer.add_string tmp tag; List.iter buffer_attr alist; Buffer.add_string tmp "/>"; if newl then Buffer.add_char tmp '\n'; | Element (tag,alist,[PCData text]) -> Buffer.add_string tmp tab; Buffer.add_char tmp '<'; Buffer.add_string tmp tag; List.iter buffer_attr alist; Buffer.add_string tmp ">"; buffer_pcdata text; Buffer.add_string tmp "'; if newl then Buffer.add_char tmp '\n'; | Element (tag,alist,l) -> Buffer.add_string tmp tab; Buffer.add_char tmp '<'; Buffer.add_string tmp tag; List.iter buffer_attr alist; Buffer.add_string tmp ">\n"; List.iter (loop ~newl:true (tab^" ")) l; Buffer.add_string tmp tab; Buffer.add_string tmp "'; if newl then Buffer.add_char tmp '\n'; | PCData text -> buffer_pcdata text; if newl then Buffer.add_char tmp '\n'; in Buffer.reset tmp; loop "" x; let s = Buffer.contents tmp in Buffer.reset tmp; s ;; XmlParser._raises (fun x p -> (* local cast : Xml.error_msg -> error_msg *) Error ((Obj.magic x : error_msg),pos p)) (fun f -> File_not_found f) (fun x p -> Dtd.Parse_error (x, (* local cast : Xml.error_pos -> error_pos *) (Obj.magic (pos p)))); Dtd._raises (fun f -> File_not_found f);ocaml-xml-light-2.2/xml.mli000064400000000000000000000134041167637302700156720ustar00rootroot00000000000000(* * Xml Light, an small Xml parser/printer with DTD support. * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *) (** Xml Light Xml Light is a minimal Xml parser & printer for OCaml. It provide few functions to parse a basic Xml document into an OCaml data structure and to print back the data structures to an Xml document. Xml Light has also support for {b DTD} (Document Type Definition). {i (c)Copyright 2002-2003 Nicolas Cannasse} *) (** {6 Xml Data Structure} *) (** An Xml node is either [Element (tag-name, attributes, children)] or [PCData text] *) type xml = | Element of (string * (string * string) list * xml list) | PCData of string (** {6 Xml Parsing} *) (** For easily parsing an Xml data source into an xml data structure, you can use theses functions. But if you want advanced parsing usage, please look at the {!XmlParser} module. All the parsing functions can raise some exceptions, see the {{:#exc}Exceptions} section for more informations. *) (** Parse the named file into an Xml data structure. *) val parse_file : string -> xml (** Read the content of the in_channel and parse it into an Xml data structure. *) val parse_in : in_channel -> xml (** Parse the string containing an Xml document into an Xml data structure. *) val parse_string : string -> xml (** {6:exc Xml Exceptions} *) (** Several exceptions can be raised when parsing an Xml document : {ul {li {!Xml.Error} is raised when an xml parsing error occurs. the {!Xml.error_msg} tells you which error occured during parsing and the {!Xml.error_pos} can be used to retreive the document location where the error occured at.} {li {!Xml.File_not_found} is raised when and error occured while opening a file with the {!Xml.parse_file} function or when a DTD file declared by the Xml document is not found {i (see the {!XmlParser} module for more informations on how to handle the DTD file loading)}.} } If the Xml document is containing a DTD, then some other exceptions can be raised, see the module {!Dtd} for more informations. *) type error_pos type error_msg = | UnterminatedComment | UnterminatedString | UnterminatedEntity | IdentExpected | CloseExpected | NodeExpected | AttributeNameExpected | AttributeValueExpected | EndOfTagExpected of string | EOFExpected type error = error_msg * error_pos exception Error of error exception File_not_found of string (** Get a full error message from an Xml error. *) val error : error -> string (** Get the Xml error message as a string. *) val error_msg : error_msg -> string (** Get the line the error occured at. *) val line : error_pos -> int (** Get the relative character range (in current line) the error occured at.*) val range : error_pos -> int * int (** Get the absolute character range the error occured at. *) val abs_range : error_pos -> int * int (** {6 Xml Functions} *) exception Not_element of xml exception Not_pcdata of xml exception No_attribute of string (** [tag xdata] returns the tag value of the xml node. Raise {!Xml.Not_element} if the xml is not an element *) val tag : xml -> string (** [pcdata xdata] returns the PCData value of the xml node. Raise {!Xml.Not_pcdata} if the xml is not a PCData *) val pcdata : xml -> string (** [attribs xdata] returns the attribute list of the xml node. First string if the attribute name, second string is attribute value. Raise {!Xml.Not_element} if the xml is not an element *) val attribs : xml -> (string * string) list (** [attrib xdata "href"] returns the value of the ["href"] attribute of the xml node (attribute matching is case-insensitive). Raise {!Xml.No_attribute} if the attribute does not exists in the node's attribute list Raise {!Xml.Not_element} if the xml is not an element *) val attrib : xml -> string -> string (** [children xdata] returns the children list of the xml node Raise {!Xml.Not_element} if the xml is not an element *) val children : xml -> xml list (*** [enum xdata] returns the children enumeration of the xml node Raise {!Xml.Not_element} if the xml is not an element *) (* val enum : xml -> xml Enum.t *) (** [iter f xdata] calls f on all children of the xml node. Raise {!Xml.Not_element} if the xml is not an element *) val iter : (xml -> unit) -> xml -> unit (** [map f xdata] is equivalent to [List.map f (Xml.children xdata)] Raise {!Xml.Not_element} if the xml is not an element *) val map : (xml -> 'a) -> xml -> 'a list (** [fold f init xdata] is equivalent to [List.fold_left f init (Xml.children xdata)] Raise {!Xml.Not_element} if the xml is not an element *) val fold : ('a -> xml -> 'a) -> 'a -> xml -> 'a (** {6 Xml Printing} *) (** Print the xml data structure into a compact xml string (without any user-readable formating ). *) val to_string : xml -> string (** Print the xml data structure into an user-readable string with tabs and lines break between different nodes. *) val to_string_fmt : xml -> string ocaml-xml-light-2.2/xmlParser.ml000064400000000000000000000121431167637302700166750ustar00rootroot00000000000000(* * Xml Light, an small Xml parser/printer with DTD support. * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) * Copyright (C) 2003 Jacques Garrigue * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *) open Printf open Dtd open Xml type t = { mutable prove : bool; mutable check_eof : bool; mutable concat_pcdata : bool; mutable resolve : (string -> checked); } type source = | SFile of string | SChannel of in_channel | SString of string | SLexbuf of Lexing.lexbuf type state = { source : Lexing.lexbuf; stack : Xml_lexer.token Stack.t; xparser : t; } exception Internal_error of Xml.error_msg exception NoMoreData let xml_error = ref (fun _ -> assert false) let dtd_error = ref (fun _ -> assert false) let file_not_found = ref (fun _ -> assert false) let _raises e f d = xml_error := e; file_not_found := f; dtd_error := d let make () = { prove = true; check_eof = true; concat_pcdata = true; resolve = (fun file -> raise (!file_not_found file)) } let prove p v = p.prove <- v let resolve p f = p.resolve <- f let check_eof p v = p.check_eof <- v let concat_pcdata p v = p.concat_pcdata <- v let pop s = try Stack.pop s.stack with Stack.Empty -> Xml_lexer.token s.source let push t s = Stack.push t s.stack let rec read_node s = match pop s with | Xml_lexer.PCData s -> PCData s | Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, []) | Xml_lexer.Tag (tag, attr, false) -> Element (tag, attr, read_elems ~tag s) | t -> push t s; raise NoMoreData and read_elems ?tag s = let elems = ref [] in (try while true do match s.xparser.concat_pcdata , read_node s , !elems with | true , PCData c , (PCData c2) :: q -> elems := PCData (sprintf "%s\n%s" c2 c) :: q | _ , x , l -> elems := x :: l done with NoMoreData -> ()); match pop s with | Xml_lexer.Endtag s when Some s = tag -> List.rev !elems | Xml_lexer.Eof when tag = None -> List.rev !elems | t -> match tag with | None -> raise (Internal_error EOFExpected) | Some s -> raise (Internal_error (EndOfTagExpected s)) let read_xml s = match s.xparser.prove, pop s with | true, Xml_lexer.DocType (root, Xml_lexer.DTDFile file) -> let pos = Xml_lexer.pos s.source in let dtd = s.xparser.resolve file in Xml_lexer.restore pos; let x = read_node s in Dtd.prove dtd root x | true, Xml_lexer.DocType (root, Xml_lexer.DTDData dtd) -> let dtd = Dtd.check dtd in let x = read_node s in Dtd.prove dtd root x | false, Xml_lexer.DocType _ -> read_node s | _, t -> push t s; read_node s let convert = function | Xml_lexer.EUnterminatedComment -> UnterminatedComment | Xml_lexer.EUnterminatedString -> UnterminatedString | Xml_lexer.EIdentExpected -> IdentExpected | Xml_lexer.ECloseExpected -> CloseExpected | Xml_lexer.ENodeExpected -> NodeExpected | Xml_lexer.EAttributeNameExpected -> AttributeNameExpected | Xml_lexer.EAttributeValueExpected -> AttributeValueExpected | Xml_lexer.EUnterminatedEntity -> UnterminatedEntity let dtd_convert = function | Xml_lexer.EInvalidDTDDecl -> InvalidDTDDecl | Xml_lexer.EInvalidDTDTag -> InvalidDTDTag | Xml_lexer.EDTDItemExpected -> DTDItemExpected | Xml_lexer.EInvalidDTDElement -> InvalidDTDElement | Xml_lexer.EInvalidDTDAttribute -> InvalidDTDAttribute let do_parse xparser source = try Xml_lexer.init source; let s = { source = source; xparser = xparser; stack = Stack.create(); } in let x = read_xml s in if xparser.check_eof && pop s <> Xml_lexer.Eof then raise (Internal_error EOFExpected); Xml_lexer.close source; x with | NoMoreData -> Xml_lexer.close source; raise (!xml_error NodeExpected source) | Internal_error e -> Xml_lexer.close source; raise (!xml_error e source) | Xml_lexer.Error e -> Xml_lexer.close source; raise (!xml_error (convert e) source) | Xml_lexer.DTDError e -> Xml_lexer.close source; raise (!dtd_error (dtd_convert e) source) let parse p = function | SChannel ch -> do_parse p (Lexing.from_channel ch) | SString str -> do_parse p (Lexing.from_string str) | SLexbuf lex -> do_parse p lex | SFile fname -> let ch = (try open_in fname with Sys_error _ -> raise (!file_not_found fname)) in try let x = do_parse p (Lexing.from_channel ch) in close_in ch; x with e -> close_in ch; raise e ocaml-xml-light-2.2/xmlParser.mli000064400000000000000000000066671167637302700170640ustar00rootroot00000000000000(* * Xml Light, an small Xml parser/printer with DTD support. * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *) (** Xml Light Parser While basic parsing functions can be used in the {!Xml} module, this module is providing a way to create, configure and run an Xml parser. *) (** Abstract type for an Xml parser. *) type t (** Several kind of resources can contain Xml documents. *) type source = | SFile of string | SChannel of in_channel | SString of string | SLexbuf of Lexing.lexbuf (** This function returns a new parser with default options. *) val make : unit -> t (** This function enable or disable automatic DTD proving with the parser. Note that Xml documents having no reference to a DTD are never proved when parsed (but you can prove them later using the {!Dtd} module {i (by default, prove is true)}. *) val prove : t -> bool -> unit (** When parsing an Xml document from a file using the {!Xml.parse_file} function, the DTD file if declared by the Xml document has to be in the same directory as the xml file. When using other parsing functions, such as on a string or on a channel, the parser will raise everytime {!Xml.File_not_found} if a DTD file is needed and prove enabled. To enable the DTD loading of the file, the user have to configure the Xml parser with a [resolve] function which is taking as argument the DTD filename and is returning a checked DTD. The user can then implement any kind of DTD loading strategy, and can use the {!Dtd} module functions to parse and check the DTD file {i (by default, the resolve function is raising} {!Xml.File_not_found}). *) val resolve : t -> (string -> Dtd.checked) -> unit (** When a Xml document is parsed, the parser will check that the end of the document is reached, so for example parsing [""] will fail instead of returning only the A element. You can turn off this check by setting [check_eof] to [false] {i (by default, check_eof is true)}. *) val check_eof : t -> bool -> unit (** Once the parser is configurated, you can run the parser on a any kind of xml document source to parse its contents into an Xml data structure. *) val parse : t -> source -> Xml.xml (** When several PCData elements are separed by a \n (or \r\n), you can either split the PCData in two distincts PCData or merge them with \n as seperator into one PCData. The default behavior is to concat the PCData, but this can be changed for a given parser with this flag. *) val concat_pcdata : t -> bool -> unit (**/**) (* internal usage only... *) val _raises : (Xml.error_msg -> Lexing.lexbuf -> exn) -> (string -> exn) -> (Dtd.parse_error_msg -> Lexing.lexbuf -> exn) -> unitocaml-xml-light-2.2/xml_lexer.mli000064400000000000000000000033041167637302700170670ustar00rootroot00000000000000(* * Xml Light, an small Xml parser/printer with DTD support. * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *) type error = | EUnterminatedComment | EUnterminatedString | EIdentExpected | ECloseExpected | ENodeExpected | EAttributeNameExpected | EAttributeValueExpected | EUnterminatedEntity type dtd_error = | EInvalidDTDDecl | EInvalidDTDTag | EDTDItemExpected | EInvalidDTDElement | EInvalidDTDAttribute exception Error of error exception DTDError of dtd_error type dtd_decl = | DTDFile of string | DTDData of Dtd.dtd type token = | Tag of string * (string * string) list * bool | PCData of string | Endtag of string | DocType of (string * dtd_decl) | Eof type pos = int * int * int * int val init : Lexing.lexbuf -> unit val close : Lexing.lexbuf -> unit val token : Lexing.lexbuf -> token val dtd : Lexing.lexbuf -> Dtd.dtd val pos : Lexing.lexbuf -> pos val restore : pos -> unitocaml-xml-light-2.2/xml_lexer.mll000064400000000000000000000300651167637302700170760ustar00rootroot00000000000000{(* * Xml Light, an small Xml parser/printer with DTD support. * Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *) open Lexing open Xml_parser open Dtd type error = | EUnterminatedComment | EUnterminatedString | EIdentExpected | ECloseExpected | ENodeExpected | EAttributeNameExpected | EAttributeValueExpected | EUnterminatedEntity type dtd_error = | EInvalidDTDDecl | EInvalidDTDTag | EDTDItemExpected | EInvalidDTDElement | EInvalidDTDAttribute exception Error of error exception DTDError of dtd_error type pos = int * int * int * int type dtd_decl = | DTDFile of string | DTDData of dtd type dtd_item_type = | TElement | TAttribute type token = | Tag of string * (string * string) list * bool | PCData of string | Endtag of string | DocType of (string * dtd_decl) | Eof let last_pos = ref 0 and current_line = ref 0 and current_line_start = ref 0 let tmp = Buffer.create 200 let idents = Hashtbl.create 0 let _ = begin Hashtbl.add idents "gt;" ">"; Hashtbl.add idents "lt;" "<"; Hashtbl.add idents "amp;" "&"; Hashtbl.add idents "apos;" "'"; Hashtbl.add idents "quot;" "\""; end let init lexbuf = current_line := 1; current_line_start := lexeme_start lexbuf; last_pos := !current_line_start let close lexbuf = Buffer.reset tmp let pos lexbuf = !current_line , !current_line_start , !last_pos , lexeme_start lexbuf let restore (cl,cls,lp,_) = current_line := cl; current_line_start := cls; last_pos := lp let newline lexbuf = incr current_line; last_pos := lexeme_end lexbuf; current_line_start := !last_pos let error lexbuf e = last_pos := lexeme_start lexbuf; raise (Error e) let dtd_error lexbuf e = last_pos := lexeme_start lexbuf; raise (DTDError e) } let newline = ['\n'] let break = ['\r'] let space = [' ' '\t'] let identchar = ['A'-'Z' 'a'-'z' '_' '0'-'9' ':' '-'] let entitychar = ['A'-'Z' 'a'-'z'] let pcchar = [^ '\r' '\n' '<' '>' '&'] rule token = parse | newline { newline lexbuf; token lexbuf } | (space | break) + { last_pos := lexeme_end lexbuf; token lexbuf } | "" { () } | eof { raise (Error EUnterminatedComment) } | _ { comment lexbuf } and header = parse | newline { newline lexbuf; header lexbuf } | "?>" { () } | eof { error lexbuf ECloseExpected } | _ { header lexbuf } and pcdata = parse | pcchar+ { Buffer.add_string tmp (lexeme lexbuf); pcdata lexbuf } | "&#" { Buffer.add_string tmp (lexeme lexbuf); pcdata lexbuf; } | '&' { Buffer.add_string tmp (entity lexbuf); pcdata lexbuf } | "" { Buffer.contents tmp } and entity = parse | entitychar+ ';' { let ident = lexeme lexbuf in try Hashtbl.find idents (String.lowercase ident) with Not_found -> "&" ^ ident } | _ | eof { raise (Error EUnterminatedEntity) } and ident_name = parse | identchar+ { lexeme lexbuf } | _ | eof { error lexbuf EIdentExpected } and close_tag = parse | '>' { () } | _ | eof { error lexbuf ECloseExpected } and attributes = parse | '>' { [], false } | "/>" { [], true } | "" (* do not read a char ! *) { let key = attribute lexbuf in let data = attribute_data lexbuf in ignore_spaces lexbuf; let others, closed = attributes lexbuf in (key, data) :: others, closed } and attribute = parse | identchar+ { lexeme lexbuf } | _ | eof { error lexbuf EAttributeNameExpected } and attribute_data = parse | space* '=' space* '"' { Buffer.reset tmp; last_pos := lexeme_end lexbuf; dq_string lexbuf } | space* '=' space* '\'' { Buffer.reset tmp; last_pos := lexeme_end lexbuf; q_string lexbuf } | _ | eof { error lexbuf EAttributeValueExpected } and dq_string = parse | '"' { Buffer.contents tmp } | '\\' [ '"' '\\' ] { Buffer.add_char tmp (lexeme_char lexbuf 1); dq_string lexbuf } | eof { raise (Error EUnterminatedString) } | _ { Buffer.add_char tmp (lexeme_char lexbuf 0); dq_string lexbuf } and q_string = parse | '\'' { Buffer.contents tmp } | '\\' [ '\'' '\\' ] { Buffer.add_char tmp (lexeme_char lexbuf 1); q_string lexbuf } | eof { raise (Error EUnterminatedString) } | _ { Buffer.add_char tmp (lexeme_char lexbuf 0); q_string lexbuf } and dtd_data = parse | "PUBLIC" { ignore_spaces lexbuf; (* skipping Public ID *) let _ = dtd_file lexbuf in let file = dtd_file lexbuf in dtd_end_decl lexbuf; DTDFile file } | "SYSTEM" { ignore_spaces lexbuf; let file = dtd_file lexbuf in dtd_end_decl lexbuf; DTDFile file } | '[' { ignore_spaces lexbuf; let data = dtd_intern lexbuf in dtd_end_decl lexbuf; DTDData data } | _ | eof { dtd_error lexbuf EInvalidDTDDecl } and dtd_file = parse | '"' { Buffer.reset tmp; let s = dq_string lexbuf in ignore_spaces lexbuf; s } | '\'' { Buffer.reset tmp; let s = q_string lexbuf in ignore_spaces lexbuf; s } | _ | eof { dtd_error lexbuf EInvalidDTDDecl } and dtd_intern = parse | ']' { ignore_spaces lexbuf; [] } | "" { let l = dtd_item lexbuf in l @ (dtd_intern lexbuf) } and dtd = parse | eof { [] } | newline { newline lexbuf; dtd lexbuf } | (space | break)+ { dtd lexbuf } | "" { let l = dtd_item lexbuf in l @ (dtd lexbuf) } and dtd_end_decl = parse | '>' { ignore_spaces lexbuf } | _ | eof { dtd_error lexbuf EInvalidDTDDecl } and dtd_item = parse | "