ocaml-xml-light-2.2/ 0000755 0000000 0000000 00000000000 11676373027 0014365 5 ustar 00root root 0000000 0000000 ocaml-xml-light-2.2/Makefile 0000644 0000000 0000000 00000005444 11676373027 0016034 0 ustar 00root root 0000000 0000000 # Makefile generated by OCamake
# http://tech.motion-twin.com
.SUFFIXES : .ml .mli .cmo .cmx .cmi .mll .mly
INSTALLDIR=`ocamlc -where`
CFLAGS=
LFLAGS= -a
LIBS=
all: xml-light.cma test.exe doc
opt: xml-light.cmxa test_opt.exe
install: all opt
cp xml-light.cmxa xml-light.a xml-light.cma xml.mli xmlParser.mli dtd.mli xml.cmi xmlParser.cmi dtd.cmi xml.cmx dtd.cmx xmlParser.cmx $(INSTALLDIR)
doc:
mkdir doc
ocamldoc -sort -html -d doc xml.mli dtd.mli xmlParser.mli
test.exe: xml-light.cma
ocamlc xml-light.cma test.ml -o test.exe
test_opt.exe: xml-light.cmxa
ocamlopt xml-light.cmxa test.ml -o test_opt.exe
xml-light.cma: xml_parser.cmo xml_lexer.cmo dtd.cmo xmlParser.cmo xml.cmo
ocamlc -o xml-light.cma $(LFLAGS) $(LIBS) xml_parser.cmo xml_lexer.cmo dtd.cmo xmlParser.cmo xml.cmo
xml-light.cmxa: xml_parser.cmx xml_lexer.cmx dtd.cmx xmlParser.cmx xml.cmx
ocamlopt -o xml-light.cmxa $(LFLAGS) $(LIBS) xml_parser.cmx xml_lexer.cmx dtd.cmx xmlParser.cmx xml.cmx
dtd.cmo: xml.cmi xml_lexer.cmi dtd.cmi
dtd.cmx: xml.cmi xml_lexer.cmi dtd.cmi
xml.cmo: dtd.cmi xmlParser.cmi xml_lexer.cmi xml.cmi
xml.cmx: dtd.cmi xmlParser.cmi xml_lexer.cmi xml.cmi
xmlParser.cmo: dtd.cmi xml.cmi xml_lexer.cmi xmlParser.cmi
xmlParser.cmx: dtd.cmi xml.cmi xml_lexer.cmi xmlParser.cmi
dtd.cmi: xml.cmi
xml.cmi:
xmlParser.cmi: dtd.cmi xml.cmi
xml_lexer.cmi: dtd.cmi
xml_parser.cmo: xml_parser.ml dtd.cmi xml_parser.mli xml_parser.cmi
xml_parser.cmx: xml_parser.ml dtd.cmi xml_parser.mli xml_parser.cmi
xml_lexer.cmo: xml_lexer.ml xml_lexer.cmi
xml_lexer.cmx: xml_lexer.ml xml_lexer.cmi
clean:
rm -f xml-light.cma test.exe dtd.cmo dtd.cmi test.cmo test.cmi xml.cmo xml.cmi xmlParser.cmo xmlParser.cmi dtd.cmi xml.cmi xmlParser.cmi xml_lexer.cmi xml_lexer.cmo xml_lexer.ml xml_parser.mli xml_parser.cmi xml_parser.ml xml_parser.cmo
rm -f xml_light.lib xml_light.a xml-light.cmxa test_opt.exe dtd.cmx dtd.obj dtd.o test.cmx test.obj test.o xml.cmx xml.obj xml.o xmlParser.cmx xmlParser.obj xmlParser.o xml_lexer.cmx xml_lexer.obj xml_lexer.o xml_parser.cmx xml_parser.obj xml_parser.o
wclean:
-@del xml-light.cma test.exe dtd.cmo dtd.cmi test.cmo test.cmi xml.cmo xml.cmi xmlParser.cmo xmlParser.cmi dtd.cmi xml.cmi xmlParser.cmi xml_lexer.cmi xml_lexer.cmo xml_lexer.ml xml_parser.mli xml_parser.cmi xml_parser.ml xml_parser.cmo 2>NUL
-@del xml_light.lib xml_light.a xml-light.cmxa test_opt.exe dtd.cmx dtd.obj dtd.o test.cmx test.obj test.o xml.cmx xml.obj xml.o xmlParser.cmx xmlParser.obj xmlParser.o xml_lexer.cmx xml_lexer.obj xml_lexer.o xml_parser.cmx xml_parser.obj xml_parser.o 2>NUL
# SUFFIXES
.ml.cmo:
ocamlc $(CFLAGS) -c $<
.ml.cmx:
ocamlopt $(CFLAGS) -c $<
.mli.cmi:
ocamlc $(CFLAGS) $<
.mll.ml:
ocamllex $<
.mly.ml:
ocamlyacc $<
ocaml-xml-light-2.2/README 0000644 0000000 0000000 00000002760 11676373027 0015252 0 ustar 00root root 0000000 0000000 Xml-Light Version 2.2 :
-----------------------
Last version : http://tech.motion-twin.com
Xml Light is a minimal Xml parser & printer for OCaml.
It provide few functions to parse a basic Xml document into
an OCaml data structure and to print back the data structures
to an Xml document.
Xml Light has also support for DTD (Document Type Definition).
Install
-------
make install
by default, Xml Light is installed in the 'ocamlc -where' directory.
you can change it by editing the Makefile.
for Windows users, if you're using the MSVC version of ocaml and
don't have cygwin tools installed, you can do : nmake all
and then copy manually the files to the place you want.
Usage
-----
simple samples :
-- parse / print an xml string ---
let x = Xml.parse_string "TEXT" in
Printf.printf "XML formated = \n%s" (Xml.to_string_fmt x);
-- load an xml and a dtd , prove and print ---
let x = Xml.parse_file "myfile.xml" in
let dtd = Dtd.parse_file "myfile.dtd" in
let x = Dtd.prove (Dtd.check dtd) "start" x in
print_endline (Xml.to_string x)
Documentation
-------------
HTML documentation can be generated with ocamldoc :
make doc
you can also directly browse the MLI files to read it.
Licence
-------
Xml Light is LGPL
Credits
-------
(c)2003-2005 Nicolas Cannasse (ncannasse@motion-twin.com)
(c)2003-2005 Motion-Twin
Some parts of this code source has an additionnal copyright to Jacques Garrigue
ocaml-xml-light-2.2/dtd.ml 0000644 0000000 0000000 00000035254 11676373027 0015503 0 ustar 00root root 0000000 0000000 (*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)
open Xml
open Printf
type parse_error_msg =
| InvalidDTDDecl
| InvalidDTDElement
| InvalidDTDAttribute
| InvalidDTDTag
| DTDItemExpected
type check_error =
| ElementDefinedTwice of string
| AttributeDefinedTwice of string * string
| ElementEmptyContructor of string
| ElementReferenced of string * string
| ElementNotDeclared of string
type prove_error =
| UnexpectedPCData
| UnexpectedTag of string
| UnexpectedAttribute of string
| InvalidAttributeValue of string
| RequiredAttribute of string
| ChildExpected of string
| EmptyExpected
type dtd_child =
| DTDTag of string
| DTDPCData
| DTDOptional of dtd_child
| DTDZeroOrMore of dtd_child
| DTDOneOrMore of dtd_child
| DTDChoice of dtd_child list
| DTDChildren of dtd_child list
type dtd_element_type =
| DTDEmpty
| DTDAny
| DTDChild of dtd_child
type dtd_attr_default =
| DTDDefault of string
| DTDRequired
| DTDImplied
| DTDFixed of string
type dtd_attr_type =
| DTDCData
| DTDNMToken
| DTDEnum of string list
type dtd_item =
| DTDAttribute of string * string * dtd_attr_type * dtd_attr_default
| DTDElement of string * dtd_element_type
type dtd_result =
| DTDNext
| DTDNotMatched
| DTDMatched
| DTDMatchedResult of dtd_child
type error_pos = {
eline : int;
eline_start : int;
emin : int;
emax : int;
}
type parse_error = parse_error_msg * Xml.error_pos
exception Parse_error of parse_error
exception Check_error of check_error
exception Prove_error of prove_error
type dtd = dtd_item list
type ('a,'b) hash = ('a,'b) Hashtbl.t
type checked = {
c_elements : (string,dtd_element_type) hash;
c_attribs : (string,(string,(dtd_attr_type * dtd_attr_default)) hash) hash;
}
type dtd_state = {
elements : (string,dtd_element_type) hash;
attribs : (string,(string,(dtd_attr_type * dtd_attr_default)) hash) hash;
mutable current : dtd_element_type;
mutable curtag : string;
state : (string * dtd_element_type) Stack.t;
}
let file_not_found = ref (fun _ -> assert false)
let _raises e =
file_not_found := e
let empty_hash = Hashtbl.create 0
let pos source =
let line, lstart, min, max = Xml_lexer.pos source in
(Obj.magic {
eline = line;
eline_start = lstart;
emin = min;
emax = max;
} : Xml.error_pos)
let convert = function
| Xml_lexer.EInvalidDTDDecl -> InvalidDTDDecl
| Xml_lexer.EInvalidDTDElement -> InvalidDTDElement
| Xml_lexer.EInvalidDTDTag -> InvalidDTDTag
| Xml_lexer.EDTDItemExpected -> DTDItemExpected
| Xml_lexer.EInvalidDTDAttribute -> InvalidDTDAttribute
let parse source =
try
Xml_lexer.init source;
(* local cast Dtd.dtd -> dtd *)
let dtd = (Obj.magic Xml_lexer.dtd source : dtd) in
Xml_lexer.close source;
dtd
with
| Xml_lexer.DTDError e ->
Xml_lexer.close source;
raise (Parse_error (convert e,pos source))
let parse_string s = parse (Lexing.from_string s)
let parse_in ch = parse (Lexing.from_channel ch)
let parse_file fname =
let ch = (try open_in fname with Sys_error _ -> raise (!file_not_found fname)) in
try
let x = parse (Lexing.from_channel ch) in
close_in ch;
x
with
e ->
close_in ch;
raise e
let check dtd =
let attribs = Hashtbl.create 0 in
let hdone = Hashtbl.create 0 in
let htodo = Hashtbl.create 0 in
let ftodo tag from =
try
ignore(Hashtbl.find hdone tag);
with
Not_found ->
try
match Hashtbl.find htodo tag with
| None -> Hashtbl.replace htodo tag from
| Some _ -> ()
with
Not_found ->
Hashtbl.add htodo tag from
in
let fdone tag edata =
try
ignore(Hashtbl.find hdone tag);
raise (Check_error (ElementDefinedTwice tag));
with
Not_found ->
Hashtbl.remove htodo tag;
Hashtbl.add hdone tag edata
in
let fattrib tag aname adata =
let h = (try
Hashtbl.find attribs tag
with
Not_found ->
let h = Hashtbl.create 1 in
Hashtbl.add attribs tag h;
h) in
try
ignore(Hashtbl.find h aname);
raise (Check_error (AttributeDefinedTwice (tag,aname)));
with
Not_found ->
Hashtbl.add h aname adata
in
let check_item = function
| DTDAttribute (tag,aname,atype,adef) ->
let utag = String.uppercase tag in
ftodo utag None;
fattrib utag (String.uppercase aname) (atype,adef)
| DTDElement (tag,etype) ->
let utag = String.uppercase tag in
fdone utag etype;
let check_type = function
| DTDEmpty -> ()
| DTDAny -> ()
| DTDChild x ->
let rec check_child = function
| DTDTag s -> ftodo (String.uppercase s) (Some utag)
| DTDPCData -> ()
| DTDOptional c
| DTDZeroOrMore c
| DTDOneOrMore c ->
check_child c
| DTDChoice []
| DTDChildren [] ->
raise (Check_error (ElementEmptyContructor tag))
| DTDChoice l
| DTDChildren l ->
List.iter check_child l
in
check_child x
in
check_type etype
in
List.iter check_item dtd;
Hashtbl.iter (fun t from ->
match from with
| None -> raise (Check_error (ElementNotDeclared t))
| Some tag -> raise (Check_error (ElementReferenced (t,tag)))
) htodo;
{
c_elements = hdone;
c_attribs = attribs;
}
let start_prove dtd root =
let d = {
elements = dtd.c_elements;
attribs = dtd.c_attribs;
state = Stack.create();
current = DTDChild (DTDTag root);
curtag = "_root";
} in
try
ignore(Hashtbl.find d.elements (String.uppercase root));
d
with
Not_found -> raise (Check_error (ElementNotDeclared root))
(* - for debug only - *)
let to_string_ref = ref (fun _ -> assert false)
let trace dtd tag =
let item = DTDElement ("current",dtd.current) in
printf "%s : %s\n"
(match tag with None -> "#PCDATA" | Some t -> t)
(!to_string_ref item)
exception TmpResult of dtd_result
let prove_child dtd tag =
trace dtd tag;
match dtd.current with
| DTDEmpty -> raise (Prove_error EmptyExpected)
| DTDAny -> ()
| DTDChild elt ->
let rec update = function
| DTDTag s ->
(match tag with
| None -> DTDNotMatched
| Some t when t = String.uppercase s -> DTDMatched
| Some _ -> DTDNotMatched)
| DTDPCData ->
(match tag with
| None -> DTDMatched
| Some _ -> DTDNotMatched)
| DTDOptional x ->
(match update x with
| DTDNotMatched
| DTDNext -> DTDNext
| DTDMatched
| DTDMatchedResult _ -> DTDMatched)
| DTDZeroOrMore x ->
(match update x with
| DTDNotMatched
| DTDNext -> DTDNext
| DTDMatched
| DTDMatchedResult _ -> DTDMatchedResult (DTDZeroOrMore x))
| DTDOneOrMore x ->
(match update x with
| DTDNotMatched
| DTDNext -> DTDNotMatched
| DTDMatched
| DTDMatchedResult _ -> DTDMatchedResult (DTDZeroOrMore x))
| DTDChoice l ->
(try
(match List.exists (fun x ->
match update x with
| DTDMatched -> true
| DTDMatchedResult _ as r -> raise (TmpResult r)
| DTDNext | DTDNotMatched -> false) l with
| true -> DTDMatched
| false -> DTDNotMatched)
with
TmpResult r -> r)
| DTDChildren [] -> assert false (* DTD is checked ! *)
| DTDChildren (h :: t) ->
(match update h with
| DTDNext ->
(match t with
| [] -> DTDNotMatched
| _ -> update (DTDChildren t))
| DTDNotMatched -> DTDNotMatched
| DTDMatchedResult r ->
DTDMatchedResult (DTDChildren (r::t))
| DTDMatched ->
match t with
| [] -> DTDMatched
| _ -> DTDMatchedResult (DTDChildren t))
in
match update elt with
| DTDNext | DTDNotMatched ->
(match tag with
| None -> raise (Prove_error UnexpectedPCData)
| Some t -> raise (Prove_error (UnexpectedTag t)))
| DTDMatched ->
dtd.current <- DTDEmpty
| DTDMatchedResult r ->
dtd.current <- DTDChild r
let is_nmtoken_char = function
| 'A'..'Z' | 'a'..'z' | '0'..'9' | '.' | '-' | '_' | ':' -> true
| _ -> false
let prove_attrib dtd attr aname (atype,adef) accu =
let aval = (try Some (List.assoc aname attr) with Not_found -> None) in
(match atype, aval with
| DTDCData, _ -> ()
| DTDNMToken, None -> ()
| DTDNMToken, Some v ->
for i = 0 to String.length v - 1 do
if not (is_nmtoken_char v.[i]) then raise (Prove_error (InvalidAttributeValue aname));
done
| DTDEnum l, None -> ()
| DTDEnum l, Some v ->
if not (List.exists ((=) v) l) then raise (Prove_error (InvalidAttributeValue aname)));
match adef, aval with
| DTDRequired, None -> raise (Prove_error (RequiredAttribute aname))
| DTDFixed v, Some av when v <> av -> raise (Prove_error (InvalidAttributeValue aname))
| DTDImplied, None -> accu
| DTDFixed v , None
| DTDDefault _, Some v
| DTDDefault v, None
| DTDRequired, Some v
| DTDImplied, Some v
| DTDFixed _, Some v -> (aname,v) :: accu
let check_attrib ahash (aname,_) =
try
ignore(Hashtbl.find ahash aname);
with
Not_found -> raise (Prove_error (UnexpectedAttribute aname))
let rec do_prove dtd = function
| PCData s ->
prove_child dtd None;
PCData s
| Element (tag,attr,childs) ->
let utag = String.uppercase tag in
let uattr = List.map (fun (aname,aval) -> String.uppercase aname , aval) attr in
prove_child dtd (Some utag);
Stack.push (dtd.curtag,dtd.current) dtd.state;
let elt = (try Hashtbl.find dtd.elements utag with Not_found -> raise (Prove_error (UnexpectedTag tag))) in
let ahash = (try Hashtbl.find dtd.attribs utag with Not_found -> empty_hash) in
dtd.curtag <- tag;
dtd.current <- elt;
List.iter (check_attrib ahash) uattr;
let attr = Hashtbl.fold (prove_attrib dtd uattr) ahash [] in
let childs = ref (List.map (do_prove dtd) childs) in
(match dtd.current with
| DTDAny
| DTDEmpty -> ()
| DTDChild elt ->
let name = ref "" in
let rec check = function
| DTDTag t ->
name := t;
false
| DTDPCData when !childs = [] ->
childs := [PCData ""];
true
| DTDPCData ->
name := "#PCDATA";
false
| DTDOptional _ -> true
| DTDZeroOrMore _ -> true
| DTDOneOrMore e ->
ignore(check e);
false
| DTDChoice l -> List.exists check l
| DTDChildren l -> List.for_all check l
in
match check elt with
| true -> ()
| false -> raise (Prove_error (ChildExpected !name)));
let ctag, cur = Stack.pop dtd.state in
dtd.curtag <- tag;
dtd.current <- cur;
Element (tag,attr,!childs)
let prove dtd root xml =
do_prove (start_prove dtd root) xml
let parse_error_msg = function
| InvalidDTDDecl -> "Invalid DOCTYPE declaration"
| InvalidDTDElement -> "Invalid DTD element declaration"
| InvalidDTDAttribute -> "Invalid DTD attribute declaration"
| InvalidDTDTag -> "Invalid DTD tag"
| DTDItemExpected -> "DTD item expected"
let parse_error (msg,pos) =
let pos = (Obj.magic pos : error_pos) in
if pos.emin = pos.emax then
sprintf "%s line %d character %d" (parse_error_msg msg) pos.eline (pos.emin - pos.eline_start)
else
sprintf "%s line %d characters %d-%d" (parse_error_msg msg) pos.eline (pos.emin - pos.eline_start) (pos.emax - pos.eline_start)
let check_error = function
| ElementDefinedTwice tag -> sprintf "Element '%s' defined twice" tag
| AttributeDefinedTwice (tag,aname) -> sprintf "Attribute '%s' of element '%s' defined twice" aname tag
| ElementEmptyContructor tag -> sprintf "Element '%s' has empty constructor" tag
| ElementReferenced (tag,from) -> sprintf "Element '%s' referenced by '%s' is not declared" tag from
| ElementNotDeclared tag -> sprintf "Element '%s' needed but is not declared" tag
let prove_error = function
| UnexpectedPCData -> "Unexpected PCData"
| UnexpectedTag tag -> sprintf "Unexpected tag : '%s'" tag
| UnexpectedAttribute att -> sprintf "Unexpected attribute : '%s'" att
| InvalidAttributeValue att -> sprintf "Invalid attribute value for '%s'" att
| RequiredAttribute att -> sprintf "Required attribute not found : '%s'" att
| ChildExpected cname -> sprintf "Child expected : '%s'" cname
| EmptyExpected -> "No more children expected"
let to_string = function
| DTDAttribute (tag,aname,atype,adef) ->
let atype_to_string = function
| DTDCData -> "CDATA"
| DTDNMToken -> "NMTOKEN"
| DTDEnum l -> sprintf "(%s)" (String.concat "|" l)
in
let adefault_to_string = function
| DTDDefault s -> sprintf "\"%s\"" s
| DTDRequired -> "#REQUIRED"
| DTDImplied -> "#IMPLIED"
| DTDFixed s -> sprintf "#FIXED \"%s\"" s
in
sprintf "" tag aname (atype_to_string atype) (adefault_to_string adef)
| DTDElement (tag,etype) ->
let rec echild_to_string = function
| DTDTag s -> s
| DTDPCData -> "#PCDATA"
| DTDOptional c -> sprintf "%s?" (echild_to_string c)
| DTDZeroOrMore c -> sprintf "%s*" (echild_to_string c)
| DTDOneOrMore c -> sprintf "%s+" (echild_to_string c)
| DTDChoice [c] -> echild_to_string c
| DTDChoice l -> sprintf "(%s)" (String.concat "|" (List.map echild_to_string l))
| DTDChildren [c] -> echild_to_string c
| DTDChildren l -> sprintf "(%s)" (String.concat "," (List.map echild_to_string l))
in
let etype_to_string = function
| DTDEmpty -> "EMPTY"
| DTDAny -> "ANY"
| DTDChild x ->
let rec op_to_string = function
| DTDOptional c -> sprintf "%s?" (op_to_string c)
| DTDZeroOrMore c -> sprintf "%s*" (op_to_string c)
| DTDOneOrMore c -> sprintf "%s+" (op_to_string c)
| _ -> ""
in
let rec root = function
| DTDOptional c
| DTDZeroOrMore c
| DTDOneOrMore c ->
root c
| DTDChoice [_]
| DTDChildren [_] as x ->
x, false
| DTDChoice _
| DTDChildren _ as x ->
x, true
| x -> x, false
in
match root x with
| r, true -> sprintf "%s%s" (echild_to_string r) (op_to_string x)
| r, false -> sprintf "(%s%s)" (echild_to_string r) (op_to_string x)
in
sprintf "" tag (etype_to_string etype)
;;
to_string_ref := to_string ocaml-xml-light-2.2/dtd.mli 0000644 0000000 0000000 00000012776 11676373027 0015660 0 ustar 00root root 0000000 0000000 (*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)
(** Xml Light DTD
This module provide several functions to create, check, and use DTD
to prove Xml documents : {ul
{li using the DTD types, you can directly create your own DTD structure}
{li the {!Dtd.check} function can then be used to check that all DTD
states have been declared, that no attributes are declared twice,
and so on.}
{li the {!Dtd.prove} function can be used to check an {!Xml} data
structure with a checked DTD. The function will return the
expanded Xml document or raise an exception if the DTD proving
fails.}
}
{i Note about ENTITIES:}
While parsing Xml, PCDATA is always parsed and
the Xml entities & > < ' " are replaced by their
corresponding ASCII characters. For Xml attributes, theses can be
put between either double or simple quotes, and the backslash character
can be used to escape inner quotes. There is no support for CDATA Xml
nodes or PCDATA attributes declarations in DTD, and no support for
user-defined entities using the ENTITY DTD element.
*)
(** {6 The DTD Types} *)
type dtd_child =
| DTDTag of string
| DTDPCData
| DTDOptional of dtd_child
| DTDZeroOrMore of dtd_child
| DTDOneOrMore of dtd_child
| DTDChoice of dtd_child list
| DTDChildren of dtd_child list
type dtd_element_type =
| DTDEmpty
| DTDAny
| DTDChild of dtd_child
type dtd_attr_default =
| DTDDefault of string
| DTDRequired
| DTDImplied
| DTDFixed of string
type dtd_attr_type =
| DTDCData
| DTDNMToken
| DTDEnum of string list
type dtd_item =
| DTDAttribute of string * string * dtd_attr_type * dtd_attr_default
| DTDElement of string * dtd_element_type
type dtd = dtd_item list
type checked
(** {6 The DTD Functions} *)
(** Parse the named file into a Dtd data structure. Raise
{!Xml.File_not_found} if an error occured while opening the file.
Raise {!Dtd.Parse_error} if parsing failed. *)
val parse_file : string -> dtd
(** Read the content of the in_channel and parse it into a Dtd data
structure. Raise {!Dtd.Parse_error} if parsing failed. *)
val parse_in : in_channel -> dtd
(** Parse the string containing a Dtd document into a Dtd data
structure. Raise {!Dtd.Parse_error} if parsing failed. *)
val parse_string : string -> dtd
(** Check the Dtd data structure declaration and return a checked
DTD. Raise {!Dtd.Check_error} if the DTD checking failed. *)
val check : dtd -> checked
(** Prove an Xml document using a checked DTD and an entry point.
The entry point is the first excepted tag of the Xml document,
the returned Xml document has the same structure has the original
one, excepted that non declared optional attributes have been set
to their default value specified in the DTD.
Raise {!Dtd.Check_error} [ElementNotDeclared] if the entry point
is not found, raise {!Dtd.Prove_error} if the Xml document failed
to be proved with the DTD. *)
val prove : checked -> string -> Xml.xml -> Xml.xml
(** Print a DTD element into a string. You can easily get a DTD
document from a DTD data structure using for example
[String.concat "\n" (List.map Dtd.to_string) my_dtd] *)
val to_string : dtd_item -> string
(** {6 The DTD Exceptions} *)
(** There is three types of DTD excecptions : {ul
{li {!Dtd.Parse_error} is raised when an error occured while
parsing a DTD document into a DTD data structure.}
{li {!Dtd.Check_error} is raised when an error occured while
checking a DTD data structure for completeness, or when the
prove entry point is not found when calling {!Dtd.prove}.}
{li {!Dtd.Prove_error} is raised when an error occured while
proving an Xml document.}
}
Several string conversion functions are provided to enable you
to report errors to the user.
*)
type parse_error_msg =
| InvalidDTDDecl
| InvalidDTDElement
| InvalidDTDAttribute
| InvalidDTDTag
| DTDItemExpected
type check_error =
| ElementDefinedTwice of string
| AttributeDefinedTwice of string * string
| ElementEmptyContructor of string
| ElementReferenced of string * string
| ElementNotDeclared of string
type prove_error =
| UnexpectedPCData
| UnexpectedTag of string
| UnexpectedAttribute of string
| InvalidAttributeValue of string
| RequiredAttribute of string
| ChildExpected of string
| EmptyExpected
type parse_error = parse_error_msg * Xml.error_pos
exception Parse_error of parse_error
exception Check_error of check_error
exception Prove_error of prove_error
val parse_error : parse_error -> string
val check_error : check_error -> string
val prove_error : prove_error -> string
(**/**)
(* internal usage only... *)
val _raises : (string -> exn) -> unit ocaml-xml-light-2.2/test.ml 0000644 0000000 0000000 00000003531 11676373027 0015700 0 ustar 00root root 0000000 0000000 (*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)
open Xml
open Dtd
let parse data =
match data.[0] with
| '#' -> Xml.parse_file (String.sub data 1 ((String.length data)-2))
| _ -> Xml.parse_string data
;;
let buf = ref "" in
print_endline "Please enter some XML data followed (press return twice to parse) :";
try
while true do
match read_line() with
| "" when !buf <> "" ->
let data = !buf in
buf := "";
(try
let x = parse data in
print_endline "Parsing...";
print_endline (Xml.to_string_fmt x);
with
| Xml.Error msg as e ->
Printf.printf "Xml error : %s\n" (Xml.error msg)
| Dtd.Parse_error msg as e ->
Printf.printf "Dtd parse error : %s\n" (Dtd.parse_error msg)
| Dtd.Check_error msg as e ->
Printf.printf "Dtd check error : %s\n" (Dtd.check_error msg)
| Dtd.Prove_error msg as e ->
Printf.printf "Dtd prove error : %s\n" (Dtd.prove_error msg))
| s ->
buf := !buf ^ s ^ "\n"
done
with
End_of_file -> print_endline "Exit." ocaml-xml-light-2.2/xml.ml 0000644 0000000 0000000 00000016234 11676373027 0015525 0 ustar 00root root 0000000 0000000 (*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)
open Printf
type xml =
| Element of (string * (string * string) list * xml list)
| PCData of string
type error_pos = {
eline : int;
eline_start : int;
emin : int;
emax : int;
}
type error_msg =
| UnterminatedComment
| UnterminatedString
| UnterminatedEntity
| IdentExpected
| CloseExpected
| NodeExpected
| AttributeNameExpected
| AttributeValueExpected
| EndOfTagExpected of string
| EOFExpected
type error = error_msg * error_pos
exception Error of error
exception File_not_found of string
exception Not_element of xml
exception Not_pcdata of xml
exception No_attribute of string
let default_parser = XmlParser.make()
let pos source =
let line, lstart, min, max = Xml_lexer.pos source in
{
eline = line;
eline_start = lstart;
emin = min;
emax = max;
}
let parse (p:XmlParser.t) (source:XmlParser.source) =
(* local cast Xml.xml -> xml *)
(Obj.magic XmlParser.parse p source : xml)
let parse_in ch = parse default_parser (XmlParser.SChannel ch)
let parse_string str = parse default_parser (XmlParser.SString str)
let parse_file f =
let p = XmlParser.make() in
let path = Filename.dirname f in
XmlParser.resolve p (fun file ->
let name = (match path with "." -> file | _ -> path ^ "/" ^ file) in
Dtd.check (Dtd.parse_file name)
);
parse p (XmlParser.SFile f)
let error_msg = function
| UnterminatedComment -> "Unterminated comment"
| UnterminatedString -> "Unterminated string"
| UnterminatedEntity -> "Unterminated entity"
| IdentExpected -> "Ident expected"
| CloseExpected -> "Element close expected"
| NodeExpected -> "Xml node expected"
| AttributeNameExpected -> "Attribute name expected"
| AttributeValueExpected -> "Attribute value expected"
| EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag
| EOFExpected -> "End of file expected"
let error (msg,pos) =
if pos.emin = pos.emax then
sprintf "%s line %d character %d" (error_msg msg) pos.eline (pos.emin - pos.eline_start)
else
sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) (pos.emax - pos.eline_start)
let line e = e.eline
let range e =
e.emin - e.eline_start , e.emax - e.eline_start
let abs_range e =
e.emin , e.emax
let tag = function
| Element (tag,_,_) -> tag
| x -> raise (Not_element x)
let pcdata = function
| PCData text -> text
| x -> raise (Not_pcdata x)
let attribs = function
| Element (_,attr,_) -> attr
| x -> raise (Not_element x)
let attrib x att =
match x with
| Element (_,attr,_) ->
(try
let att = String.lowercase att in
snd (List.find (fun (n,_) -> String.lowercase n = att) attr)
with
Not_found ->
raise (No_attribute att))
| x ->
raise (Not_element x)
let children = function
| Element (_,_,clist) -> clist
| x -> raise (Not_element x)
(*let enum = function
| Element (_,_,clist) -> List.to_enum clist
| x -> raise (Not_element x)
*)
let iter f = function
| Element (_,_,clist) -> List.iter f clist
| x -> raise (Not_element x)
let map f = function
| Element (_,_,clist) -> List.map f clist
| x -> raise (Not_element x)
let fold f v = function
| Element (_,_,clist) -> List.fold_left f v clist
| x -> raise (Not_element x)
let tmp = Buffer.create 200
let buffer_pcdata text =
let l = String.length text in
for p = 0 to l-1 do
match text.[p] with
| '>' -> Buffer.add_string tmp ">"
| '<' -> Buffer.add_string tmp "<"
| '&' ->
if p < l-1 && text.[p+1] = '#' then
Buffer.add_char tmp '&'
else
Buffer.add_string tmp "&"
| '\'' -> Buffer.add_string tmp "'"
| '"' -> Buffer.add_string tmp """
| c -> Buffer.add_char tmp c
done
let buffer_attr (n,v) =
Buffer.add_char tmp ' ';
Buffer.add_string tmp n;
Buffer.add_string tmp "=\"";
let l = String.length v in
for p = 0 to l-1 do
match v.[p] with
| '\\' -> Buffer.add_string tmp "\\\\"
| '"' -> Buffer.add_string tmp "\\\""
| c -> Buffer.add_char tmp c
done;
Buffer.add_char tmp '"'
let to_string x =
let pcdata = ref false in
let rec loop = function
| Element (tag,alist,[]) ->
Buffer.add_char tmp '<';
Buffer.add_string tmp tag;
List.iter buffer_attr alist;
Buffer.add_string tmp "/>";
pcdata := false;
| Element (tag,alist,l) ->
Buffer.add_char tmp '<';
Buffer.add_string tmp tag;
List.iter buffer_attr alist;
Buffer.add_char tmp '>';
pcdata := false;
List.iter loop l;
Buffer.add_string tmp "";
Buffer.add_string tmp tag;
Buffer.add_char tmp '>';
pcdata := false;
| PCData text ->
if !pcdata then Buffer.add_char tmp ' ';
buffer_pcdata text;
pcdata := true;
in
Buffer.reset tmp;
loop x;
let s = Buffer.contents tmp in
Buffer.reset tmp;
s
let to_string_fmt x =
let rec loop ?(newl=false) tab = function
| Element (tag,alist,[]) ->
Buffer.add_string tmp tab;
Buffer.add_char tmp '<';
Buffer.add_string tmp tag;
List.iter buffer_attr alist;
Buffer.add_string tmp "/>";
if newl then Buffer.add_char tmp '\n';
| Element (tag,alist,[PCData text]) ->
Buffer.add_string tmp tab;
Buffer.add_char tmp '<';
Buffer.add_string tmp tag;
List.iter buffer_attr alist;
Buffer.add_string tmp ">";
buffer_pcdata text;
Buffer.add_string tmp "";
Buffer.add_string tmp tag;
Buffer.add_char tmp '>';
if newl then Buffer.add_char tmp '\n';
| Element (tag,alist,l) ->
Buffer.add_string tmp tab;
Buffer.add_char tmp '<';
Buffer.add_string tmp tag;
List.iter buffer_attr alist;
Buffer.add_string tmp ">\n";
List.iter (loop ~newl:true (tab^" ")) l;
Buffer.add_string tmp tab;
Buffer.add_string tmp "";
Buffer.add_string tmp tag;
Buffer.add_char tmp '>';
if newl then Buffer.add_char tmp '\n';
| PCData text ->
buffer_pcdata text;
if newl then Buffer.add_char tmp '\n';
in
Buffer.reset tmp;
loop "" x;
let s = Buffer.contents tmp in
Buffer.reset tmp;
s
;;
XmlParser._raises (fun x p ->
(* local cast : Xml.error_msg -> error_msg *)
Error ((Obj.magic x : error_msg),pos p))
(fun f -> File_not_found f)
(fun x p -> Dtd.Parse_error (x,
(* local cast : Xml.error_pos -> error_pos *)
(Obj.magic (pos p))));
Dtd._raises (fun f -> File_not_found f); ocaml-xml-light-2.2/xml.mli 0000644 0000000 0000000 00000013404 11676373027 0015672 0 ustar 00root root 0000000 0000000 (*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)
(** Xml Light
Xml Light is a minimal Xml parser & printer for OCaml.
It provide few functions to parse a basic Xml document into
an OCaml data structure and to print back the data structures
to an Xml document.
Xml Light has also support for {b DTD} (Document Type Definition).
{i (c)Copyright 2002-2003 Nicolas Cannasse}
*)
(** {6 Xml Data Structure} *)
(** An Xml node is either
[Element (tag-name, attributes, children)] or [PCData text] *)
type xml =
| Element of (string * (string * string) list * xml list)
| PCData of string
(** {6 Xml Parsing} *)
(** For easily parsing an Xml data source into an xml data structure,
you can use theses functions. But if you want advanced parsing usage,
please look at the {!XmlParser} module.
All the parsing functions can raise some exceptions, see the
{{:#exc}Exceptions} section for more informations. *)
(** Parse the named file into an Xml data structure. *)
val parse_file : string -> xml
(** Read the content of the in_channel and parse it into an Xml data
structure. *)
val parse_in : in_channel -> xml
(** Parse the string containing an Xml document into an Xml data
structure. *)
val parse_string : string -> xml
(** {6:exc Xml Exceptions} *)
(** Several exceptions can be raised when parsing an Xml document : {ul
{li {!Xml.Error} is raised when an xml parsing error occurs. the
{!Xml.error_msg} tells you which error occured during parsing
and the {!Xml.error_pos} can be used to retreive the document
location where the error occured at.}
{li {!Xml.File_not_found} is raised when and error occured while
opening a file with the {!Xml.parse_file} function or when a
DTD file declared by the Xml document is not found {i (see the
{!XmlParser} module for more informations on how to handle the
DTD file loading)}.}
}
If the Xml document is containing a DTD, then some other exceptions
can be raised, see the module {!Dtd} for more informations.
*)
type error_pos
type error_msg =
| UnterminatedComment
| UnterminatedString
| UnterminatedEntity
| IdentExpected
| CloseExpected
| NodeExpected
| AttributeNameExpected
| AttributeValueExpected
| EndOfTagExpected of string
| EOFExpected
type error = error_msg * error_pos
exception Error of error
exception File_not_found of string
(** Get a full error message from an Xml error. *)
val error : error -> string
(** Get the Xml error message as a string. *)
val error_msg : error_msg -> string
(** Get the line the error occured at. *)
val line : error_pos -> int
(** Get the relative character range (in current line) the error occured at.*)
val range : error_pos -> int * int
(** Get the absolute character range the error occured at. *)
val abs_range : error_pos -> int * int
(** {6 Xml Functions} *)
exception Not_element of xml
exception Not_pcdata of xml
exception No_attribute of string
(** [tag xdata] returns the tag value of the xml node.
Raise {!Xml.Not_element} if the xml is not an element *)
val tag : xml -> string
(** [pcdata xdata] returns the PCData value of the xml node.
Raise {!Xml.Not_pcdata} if the xml is not a PCData *)
val pcdata : xml -> string
(** [attribs xdata] returns the attribute list of the xml node.
First string if the attribute name, second string is attribute value.
Raise {!Xml.Not_element} if the xml is not an element *)
val attribs : xml -> (string * string) list
(** [attrib xdata "href"] returns the value of the ["href"]
attribute of the xml node (attribute matching is case-insensitive).
Raise {!Xml.No_attribute} if the attribute does not exists in the node's
attribute list
Raise {!Xml.Not_element} if the xml is not an element *)
val attrib : xml -> string -> string
(** [children xdata] returns the children list of the xml node
Raise {!Xml.Not_element} if the xml is not an element *)
val children : xml -> xml list
(*** [enum xdata] returns the children enumeration of the xml node
Raise {!Xml.Not_element} if the xml is not an element *)
(* val enum : xml -> xml Enum.t *)
(** [iter f xdata] calls f on all children of the xml node.
Raise {!Xml.Not_element} if the xml is not an element *)
val iter : (xml -> unit) -> xml -> unit
(** [map f xdata] is equivalent to [List.map f (Xml.children xdata)]
Raise {!Xml.Not_element} if the xml is not an element *)
val map : (xml -> 'a) -> xml -> 'a list
(** [fold f init xdata] is equivalent to
[List.fold_left f init (Xml.children xdata)]
Raise {!Xml.Not_element} if the xml is not an element *)
val fold : ('a -> xml -> 'a) -> 'a -> xml -> 'a
(** {6 Xml Printing} *)
(** Print the xml data structure into a compact xml string (without
any user-readable formating ). *)
val to_string : xml -> string
(** Print the xml data structure into an user-readable string with
tabs and lines break between different nodes. *)
val to_string_fmt : xml -> string
ocaml-xml-light-2.2/xmlParser.ml 0000644 0000000 0000000 00000012143 11676373027 0016675 0 ustar 00root root 0000000 0000000 (*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
* Copyright (C) 2003 Jacques Garrigue
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)
open Printf
open Dtd
open Xml
type t = {
mutable prove : bool;
mutable check_eof : bool;
mutable concat_pcdata : bool;
mutable resolve : (string -> checked);
}
type source =
| SFile of string
| SChannel of in_channel
| SString of string
| SLexbuf of Lexing.lexbuf
type state = {
source : Lexing.lexbuf;
stack : Xml_lexer.token Stack.t;
xparser : t;
}
exception Internal_error of Xml.error_msg
exception NoMoreData
let xml_error = ref (fun _ -> assert false)
let dtd_error = ref (fun _ -> assert false)
let file_not_found = ref (fun _ -> assert false)
let _raises e f d =
xml_error := e;
file_not_found := f;
dtd_error := d
let make () =
{
prove = true;
check_eof = true;
concat_pcdata = true;
resolve = (fun file -> raise (!file_not_found file))
}
let prove p v = p.prove <- v
let resolve p f = p.resolve <- f
let check_eof p v = p.check_eof <- v
let concat_pcdata p v = p.concat_pcdata <- v
let pop s =
try
Stack.pop s.stack
with
Stack.Empty ->
Xml_lexer.token s.source
let push t s =
Stack.push t s.stack
let rec read_node s =
match pop s with
| Xml_lexer.PCData s -> PCData s
| Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, [])
| Xml_lexer.Tag (tag, attr, false) -> Element (tag, attr, read_elems ~tag s)
| t ->
push t s;
raise NoMoreData
and
read_elems ?tag s =
let elems = ref [] in
(try
while true do
match s.xparser.concat_pcdata , read_node s , !elems with
| true , PCData c , (PCData c2) :: q ->
elems := PCData (sprintf "%s\n%s" c2 c) :: q
| _ , x , l ->
elems := x :: l
done
with
NoMoreData -> ());
match pop s with
| Xml_lexer.Endtag s when Some s = tag -> List.rev !elems
| Xml_lexer.Eof when tag = None -> List.rev !elems
| t ->
match tag with
| None -> raise (Internal_error EOFExpected)
| Some s -> raise (Internal_error (EndOfTagExpected s))
let read_xml s =
match s.xparser.prove, pop s with
| true, Xml_lexer.DocType (root, Xml_lexer.DTDFile file) ->
let pos = Xml_lexer.pos s.source in
let dtd = s.xparser.resolve file in
Xml_lexer.restore pos;
let x = read_node s in
Dtd.prove dtd root x
| true, Xml_lexer.DocType (root, Xml_lexer.DTDData dtd) ->
let dtd = Dtd.check dtd in
let x = read_node s in
Dtd.prove dtd root x
| false, Xml_lexer.DocType _ ->
read_node s
| _, t ->
push t s;
read_node s
let convert = function
| Xml_lexer.EUnterminatedComment -> UnterminatedComment
| Xml_lexer.EUnterminatedString -> UnterminatedString
| Xml_lexer.EIdentExpected -> IdentExpected
| Xml_lexer.ECloseExpected -> CloseExpected
| Xml_lexer.ENodeExpected -> NodeExpected
| Xml_lexer.EAttributeNameExpected -> AttributeNameExpected
| Xml_lexer.EAttributeValueExpected -> AttributeValueExpected
| Xml_lexer.EUnterminatedEntity -> UnterminatedEntity
let dtd_convert = function
| Xml_lexer.EInvalidDTDDecl -> InvalidDTDDecl
| Xml_lexer.EInvalidDTDTag -> InvalidDTDTag
| Xml_lexer.EDTDItemExpected -> DTDItemExpected
| Xml_lexer.EInvalidDTDElement -> InvalidDTDElement
| Xml_lexer.EInvalidDTDAttribute -> InvalidDTDAttribute
let do_parse xparser source =
try
Xml_lexer.init source;
let s = { source = source; xparser = xparser; stack = Stack.create(); } in
let x = read_xml s in
if xparser.check_eof && pop s <> Xml_lexer.Eof then raise (Internal_error EOFExpected);
Xml_lexer.close source;
x
with
| NoMoreData ->
Xml_lexer.close source;
raise (!xml_error NodeExpected source)
| Internal_error e ->
Xml_lexer.close source;
raise (!xml_error e source)
| Xml_lexer.Error e ->
Xml_lexer.close source;
raise (!xml_error (convert e) source)
| Xml_lexer.DTDError e ->
Xml_lexer.close source;
raise (!dtd_error (dtd_convert e) source)
let parse p = function
| SChannel ch -> do_parse p (Lexing.from_channel ch)
| SString str -> do_parse p (Lexing.from_string str)
| SLexbuf lex -> do_parse p lex
| SFile fname ->
let ch = (try open_in fname with Sys_error _ -> raise (!file_not_found fname)) in
try
let x = do_parse p (Lexing.from_channel ch) in
close_in ch;
x
with
e ->
close_in ch;
raise e
ocaml-xml-light-2.2/xmlParser.mli 0000644 0000000 0000000 00000006667 11676373027 0017064 0 ustar 00root root 0000000 0000000 (*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)
(** Xml Light Parser
While basic parsing functions can be used in the {!Xml} module, this module
is providing a way to create, configure and run an Xml parser.
*)
(** Abstract type for an Xml parser. *)
type t
(** Several kind of resources can contain Xml documents. *)
type source =
| SFile of string
| SChannel of in_channel
| SString of string
| SLexbuf of Lexing.lexbuf
(** This function returns a new parser with default options. *)
val make : unit -> t
(** This function enable or disable automatic DTD proving with the parser.
Note that Xml documents having no reference to a DTD are never proved
when parsed (but you can prove them later using the {!Dtd} module
{i (by default, prove is true)}. *)
val prove : t -> bool -> unit
(** When parsing an Xml document from a file using the {!Xml.parse_file}
function, the DTD file if declared by the Xml document has to be in the
same directory as the xml file. When using other parsing functions,
such as on a string or on a channel, the parser will raise everytime
{!Xml.File_not_found} if a DTD file is needed and prove enabled. To enable
the DTD loading of the file, the user have to configure the Xml parser
with a [resolve] function which is taking as argument the DTD filename and
is returning a checked DTD. The user can then implement any kind of DTD
loading strategy, and can use the {!Dtd} module functions to parse and check
the DTD file {i (by default, the resolve function is raising}
{!Xml.File_not_found}). *)
val resolve : t -> (string -> Dtd.checked) -> unit
(** When a Xml document is parsed, the parser will check that the end of the
document is reached, so for example parsing [""] will fail instead
of returning only the A element. You can turn off this check by setting
[check_eof] to [false] {i (by default, check_eof is true)}. *)
val check_eof : t -> bool -> unit
(** Once the parser is configurated, you can run the parser on a any kind
of xml document source to parse its contents into an Xml data structure. *)
val parse : t -> source -> Xml.xml
(** When several PCData elements are separed by a \n (or \r\n), you can
either split the PCData in two distincts PCData or merge them with \n
as seperator into one PCData. The default behavior is to concat the
PCData, but this can be changed for a given parser with this flag. *)
val concat_pcdata : t -> bool -> unit
(**/**)
(* internal usage only... *)
val _raises : (Xml.error_msg -> Lexing.lexbuf -> exn) -> (string -> exn) -> (Dtd.parse_error_msg -> Lexing.lexbuf -> exn) -> unit ocaml-xml-light-2.2/xml_lexer.mli 0000644 0000000 0000000 00000003304 11676373027 0017067 0 ustar 00root root 0000000 0000000 (*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)
type error =
| EUnterminatedComment
| EUnterminatedString
| EIdentExpected
| ECloseExpected
| ENodeExpected
| EAttributeNameExpected
| EAttributeValueExpected
| EUnterminatedEntity
type dtd_error =
| EInvalidDTDDecl
| EInvalidDTDTag
| EDTDItemExpected
| EInvalidDTDElement
| EInvalidDTDAttribute
exception Error of error
exception DTDError of dtd_error
type dtd_decl =
| DTDFile of string
| DTDData of Dtd.dtd
type token =
| Tag of string * (string * string) list * bool
| PCData of string
| Endtag of string
| DocType of (string * dtd_decl)
| Eof
type pos = int * int * int * int
val init : Lexing.lexbuf -> unit
val close : Lexing.lexbuf -> unit
val token : Lexing.lexbuf -> token
val dtd : Lexing.lexbuf -> Dtd.dtd
val pos : Lexing.lexbuf -> pos
val restore : pos -> unit ocaml-xml-light-2.2/xml_lexer.mll 0000644 0000000 0000000 00000030065 11676373027 0017076 0 ustar 00root root 0000000 0000000 {(*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)
open Lexing
open Xml_parser
open Dtd
type error =
| EUnterminatedComment
| EUnterminatedString
| EIdentExpected
| ECloseExpected
| ENodeExpected
| EAttributeNameExpected
| EAttributeValueExpected
| EUnterminatedEntity
type dtd_error =
| EInvalidDTDDecl
| EInvalidDTDTag
| EDTDItemExpected
| EInvalidDTDElement
| EInvalidDTDAttribute
exception Error of error
exception DTDError of dtd_error
type pos = int * int * int * int
type dtd_decl =
| DTDFile of string
| DTDData of dtd
type dtd_item_type =
| TElement
| TAttribute
type token =
| Tag of string * (string * string) list * bool
| PCData of string
| Endtag of string
| DocType of (string * dtd_decl)
| Eof
let last_pos = ref 0
and current_line = ref 0
and current_line_start = ref 0
let tmp = Buffer.create 200
let idents = Hashtbl.create 0
let _ = begin
Hashtbl.add idents "gt;" ">";
Hashtbl.add idents "lt;" "<";
Hashtbl.add idents "amp;" "&";
Hashtbl.add idents "apos;" "'";
Hashtbl.add idents "quot;" "\"";
end
let init lexbuf =
current_line := 1;
current_line_start := lexeme_start lexbuf;
last_pos := !current_line_start
let close lexbuf =
Buffer.reset tmp
let pos lexbuf =
!current_line , !current_line_start ,
!last_pos ,
lexeme_start lexbuf
let restore (cl,cls,lp,_) =
current_line := cl;
current_line_start := cls;
last_pos := lp
let newline lexbuf =
incr current_line;
last_pos := lexeme_end lexbuf;
current_line_start := !last_pos
let error lexbuf e =
last_pos := lexeme_start lexbuf;
raise (Error e)
let dtd_error lexbuf e =
last_pos := lexeme_start lexbuf;
raise (DTDError e)
}
let newline = ['\n']
let break = ['\r']
let space = [' ' '\t']
let identchar = ['A'-'Z' 'a'-'z' '_' '0'-'9' ':' '-']
let entitychar = ['A'-'Z' 'a'-'z']
let pcchar = [^ '\r' '\n' '<' '>' '&']
rule token = parse
| newline
{
newline lexbuf;
token lexbuf
}
| (space | break) +
{
last_pos := lexeme_end lexbuf;
token lexbuf
}
| ""
{ () }
| eof
{ raise (Error EUnterminatedComment) }
| _
{ comment lexbuf }
and header = parse
| newline
{
newline lexbuf;
header lexbuf
}
| "?>"
{ () }
| eof
{ error lexbuf ECloseExpected }
| _
{ header lexbuf }
and pcdata = parse
| pcchar+
{
Buffer.add_string tmp (lexeme lexbuf);
pcdata lexbuf
}
| ""
{
Buffer.add_string tmp (lexeme lexbuf);
pcdata lexbuf;
}
| '&'
{
Buffer.add_string tmp (entity lexbuf);
pcdata lexbuf
}
| ""
{ Buffer.contents tmp }
and entity = parse
| entitychar+ ';'
{
let ident = lexeme lexbuf in
try
Hashtbl.find idents (String.lowercase ident)
with
Not_found -> "&" ^ ident
}
| _ | eof
{ raise (Error EUnterminatedEntity) }
and ident_name = parse
| identchar+
{ lexeme lexbuf }
| _ | eof
{ error lexbuf EIdentExpected }
and close_tag = parse
| '>'
{ () }
| _ | eof
{ error lexbuf ECloseExpected }
and attributes = parse
| '>'
{ [], false }
| "/>"
{ [], true }
| "" (* do not read a char ! *)
{
let key = attribute lexbuf in
let data = attribute_data lexbuf in
ignore_spaces lexbuf;
let others, closed = attributes lexbuf in
(key, data) :: others, closed
}
and attribute = parse
| identchar+
{ lexeme lexbuf }
| _ | eof
{ error lexbuf EAttributeNameExpected }
and attribute_data = parse
| space* '=' space* '"'
{
Buffer.reset tmp;
last_pos := lexeme_end lexbuf;
dq_string lexbuf
}
| space* '=' space* '\''
{
Buffer.reset tmp;
last_pos := lexeme_end lexbuf;
q_string lexbuf
}
| _ | eof
{ error lexbuf EAttributeValueExpected }
and dq_string = parse
| '"'
{ Buffer.contents tmp }
| '\\' [ '"' '\\' ]
{
Buffer.add_char tmp (lexeme_char lexbuf 1);
dq_string lexbuf
}
| eof
{ raise (Error EUnterminatedString) }
| _
{
Buffer.add_char tmp (lexeme_char lexbuf 0);
dq_string lexbuf
}
and q_string = parse
| '\''
{ Buffer.contents tmp }
| '\\' [ '\'' '\\' ]
{
Buffer.add_char tmp (lexeme_char lexbuf 1);
q_string lexbuf
}
| eof
{ raise (Error EUnterminatedString) }
| _
{
Buffer.add_char tmp (lexeme_char lexbuf 0);
q_string lexbuf
}
and dtd_data = parse
| "PUBLIC"
{
ignore_spaces lexbuf;
(* skipping Public ID *)
let _ = dtd_file lexbuf in
let file = dtd_file lexbuf in
dtd_end_decl lexbuf;
DTDFile file
}
| "SYSTEM"
{
ignore_spaces lexbuf;
let file = dtd_file lexbuf in
dtd_end_decl lexbuf;
DTDFile file
}
| '['
{
ignore_spaces lexbuf;
let data = dtd_intern lexbuf in
dtd_end_decl lexbuf;
DTDData data
}
| _ | eof
{ dtd_error lexbuf EInvalidDTDDecl }
and dtd_file = parse
| '"'
{
Buffer.reset tmp;
let s = dq_string lexbuf in
ignore_spaces lexbuf;
s
}
| '\''
{
Buffer.reset tmp;
let s = q_string lexbuf in
ignore_spaces lexbuf;
s
}
| _ | eof
{ dtd_error lexbuf EInvalidDTDDecl }
and dtd_intern = parse
| ']'
{
ignore_spaces lexbuf;
[]
}
| ""
{
let l = dtd_item lexbuf in
l @ (dtd_intern lexbuf)
}
and dtd = parse
| eof
{ [] }
| newline
{
newline lexbuf;
dtd lexbuf
}
| (space | break)+
{ dtd lexbuf }
| ""
{
let l = dtd_item lexbuf in
l @ (dtd lexbuf)
}
and dtd_end_decl = parse
| '>'
{ ignore_spaces lexbuf }
| _ | eof
{ dtd_error lexbuf EInvalidDTDDecl }
and dtd_item = parse
| "