Sisyphus repositório
Última atualização: 1 outubro 2023 | SRPMs: 18631 | Visitas: 37404252
en ru br
ALT Linux repositórios
S:2.5-alt1

Group :: Desenvolvimento/ML
RPM: ocaml-xml-light

 Main   Changelog   Spec   Patches   Sources   Download   Gear   Bugs e FR  Repocop 

ocaml-xml-light-2.2/000075500000000000000000000000001167637302700143655ustar00rootroot00000000000000ocaml-xml-light-2.2/Makefile000064400000000000000000000054441167637302700160340ustar00rootroot00000000000000# Makefile generated by OCamake 
# http://tech.motion-twin.com
.SUFFIXES : .ml .mli .cmo .cmx .cmi .mll .mly

INSTALLDIR=`ocamlc -where`
CFLAGS=
LFLAGS= -a
LIBS=

all: xml-light.cma test.exe doc

opt: xml-light.cmxa test_opt.exe

install: all opt
cp xml-light.cmxa xml-light.a xml-light.cma xml.mli xmlParser.mli dtd.mli xml.cmi xmlParser.cmi dtd.cmi xml.cmx dtd.cmx xmlParser.cmx $(INSTALLDIR)

doc:
mkdir doc
ocamldoc -sort -html -d doc xml.mli dtd.mli xmlParser.mli

test.exe: xml-light.cma
ocamlc xml-light.cma test.ml -o test.exe

test_opt.exe: xml-light.cmxa
ocamlopt xml-light.cmxa test.ml -o test_opt.exe

xml-light.cma: xml_parser.cmo xml_lexer.cmo dtd.cmo xmlParser.cmo xml.cmo
ocamlc -o xml-light.cma $(LFLAGS) $(LIBS) xml_parser.cmo xml_lexer.cmo dtd.cmo xmlParser.cmo xml.cmo

xml-light.cmxa: xml_parser.cmx xml_lexer.cmx dtd.cmx xmlParser.cmx xml.cmx
ocamlopt -o xml-light.cmxa $(LFLAGS) $(LIBS) xml_parser.cmx xml_lexer.cmx dtd.cmx xmlParser.cmx xml.cmx

dtd.cmo: xml.cmi xml_lexer.cmi dtd.cmi

dtd.cmx: xml.cmi xml_lexer.cmi dtd.cmi

xml.cmo: dtd.cmi xmlParser.cmi xml_lexer.cmi xml.cmi

xml.cmx: dtd.cmi xmlParser.cmi xml_lexer.cmi xml.cmi

xmlParser.cmo: dtd.cmi xml.cmi xml_lexer.cmi xmlParser.cmi

xmlParser.cmx: dtd.cmi xml.cmi xml_lexer.cmi xmlParser.cmi

dtd.cmi: xml.cmi

xml.cmi:

xmlParser.cmi: dtd.cmi xml.cmi

xml_lexer.cmi: dtd.cmi

xml_parser.cmo: xml_parser.ml dtd.cmi xml_parser.mli xml_parser.cmi

xml_parser.cmx: xml_parser.ml dtd.cmi xml_parser.mli xml_parser.cmi

xml_lexer.cmo: xml_lexer.ml xml_lexer.cmi

xml_lexer.cmx: xml_lexer.ml xml_lexer.cmi

clean:
rm -f xml-light.cma test.exe dtd.cmo dtd.cmi test.cmo test.cmi xml.cmo xml.cmi xmlParser.cmo xmlParser.cmi dtd.cmi xml.cmi xmlParser.cmi xml_lexer.cmi xml_lexer.cmo xml_lexer.ml xml_parser.mli xml_parser.cmi xml_parser.ml xml_parser.cmo
rm -f xml_light.lib xml_light.a xml-light.cmxa test_opt.exe dtd.cmx dtd.obj dtd.o test.cmx test.obj test.o xml.cmx xml.obj xml.o xmlParser.cmx xmlParser.obj xmlParser.o xml_lexer.cmx xml_lexer.obj xml_lexer.o xml_parser.cmx xml_parser.obj xml_parser.o

wclean:
-@del xml-light.cma test.exe dtd.cmo dtd.cmi test.cmo test.cmi xml.cmo xml.cmi xmlParser.cmo xmlParser.cmi dtd.cmi xml.cmi xmlParser.cmi xml_lexer.cmi xml_lexer.cmo xml_lexer.ml xml_parser.mli xml_parser.cmi xml_parser.ml xml_parser.cmo 2>NUL
-@del xml_light.lib xml_light.a xml-light.cmxa test_opt.exe dtd.cmx dtd.obj dtd.o test.cmx test.obj test.o xml.cmx xml.obj xml.o xmlParser.cmx xmlParser.obj xmlParser.o xml_lexer.cmx xml_lexer.obj xml_lexer.o xml_parser.cmx xml_parser.obj xml_parser.o 2>NUL

# SUFFIXES
.ml.cmo:
ocamlc $(CFLAGS) -c $<

.ml.cmx:
ocamlopt $(CFLAGS) -c $<

.mli.cmi:
ocamlc $(CFLAGS) $<

.mll.ml:
ocamllex $<

.mly.ml:
ocamlyacc $<

ocaml-xml-light-2.2/README000064400000000000000000000027601167637302700152520ustar00rootroot00000000000000Xml-Light Version 2.2 :
-----------------------

Last version : http://tech.motion-twin.com

Xml Light is a minimal Xml parser & printer for OCaml.
It provide few functions to parse a basic Xml document into
an OCaml data structure and to print back the data structures
to an Xml document.

Xml Light has also support for DTD (Document Type Definition).


Install
-------

make install

by default, Xml Light is installed in the 'ocamlc -where' directory.
you can change it by editing the Makefile.

for Windows users, if you're using the MSVC version of ocaml and
don't have cygwin tools installed, you can do : nmake all
and then copy manually the files to the place you want.

Usage
-----

simple samples :


-- parse / print an xml string ---

let x = Xml.parse_string "<a href='url'>TEXT<begin/><end/></a>" in
Printf.printf "XML formated = \n%s" (Xml.to_string_fmt x);

-- load an xml and a dtd , prove and print ---

let x = Xml.parse_file "myfile.xml" in
let dtd = Dtd.parse_file "myfile.dtd" in
let x = Dtd.prove (Dtd.check dtd) "start" x in
print_endline (Xml.to_string x)


Documentation
-------------

HTML documentation can be generated with ocamldoc :

make doc

you can also directly browse the MLI files to read it.

Licence
-------

Xml Light is LGPL

Credits
-------
(c)2003-2005 Nicolas Cannasse (ncannasse@motion-twin.com)
(c)2003-2005 Motion-Twin
Some parts of this code source has an additionnal copyright to Jacques Garrigue
ocaml-xml-light-2.2/dtd.ml000064400000000000000000000352541167637302700155030ustar00rootroot00000000000000(*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)

open Xml
open Printf

type parse_error_msg =
| InvalidDTDDecl
| InvalidDTDElement
| InvalidDTDAttribute
| InvalidDTDTag
| DTDItemExpected

type check_error =
| ElementDefinedTwice of string
| AttributeDefinedTwice of string * string
| ElementEmptyContructor of string
| ElementReferenced of string * string
| ElementNotDeclared of string

type prove_error =
| UnexpectedPCData
| UnexpectedTag of string
| UnexpectedAttribute of string
| InvalidAttributeValue of string
| RequiredAttribute of string
| ChildExpected of string
| EmptyExpected

type dtd_child =
| DTDTag of string
| DTDPCData
| DTDOptional of dtd_child
| DTDZeroOrMore of dtd_child
| DTDOneOrMore of dtd_child
| DTDChoice of dtd_child list
| DTDChildren of dtd_child list

type dtd_element_type =
| DTDEmpty
| DTDAny
| DTDChild of dtd_child

type dtd_attr_default =
| DTDDefault of string
| DTDRequired
| DTDImplied
| DTDFixed of string

type dtd_attr_type =
| DTDCData
| DTDNMToken
| DTDEnum of string list

type dtd_item =
| DTDAttribute of string * string * dtd_attr_type * dtd_attr_default
| DTDElement of string * dtd_element_type

type dtd_result =
| DTDNext
| DTDNotMatched
| DTDMatched
| DTDMatchedResult of dtd_child

type error_pos = {
eline : int;
eline_start : int;
emin : int;
emax : int;
}

type parse_error = parse_error_msg * Xml.error_pos

exception Parse_error of parse_error
exception Check_error of check_error
exception Prove_error of prove_error

type dtd = dtd_item list

type ('a,'b) hash = ('a,'b) Hashtbl.t

type checked = {
c_elements : (string,dtd_element_type) hash;
c_attribs : (string,(string,(dtd_attr_type * dtd_attr_default)) hash) hash;
}

type dtd_state = {
elements : (string,dtd_element_type) hash;
attribs : (string,(string,(dtd_attr_type * dtd_attr_default)) hash) hash;
mutable current : dtd_element_type;
mutable curtag : string;
state : (string * dtd_element_type) Stack.t;
}

let file_not_found = ref (fun _ -> assert false)

let _raises e =
file_not_found := e

let empty_hash = Hashtbl.create 0

let pos source =
let line, lstart, min, max = Xml_lexer.pos source in
(Obj.magic {
eline = line;
eline_start = lstart;
emin = min;
emax = max;
} : Xml.error_pos)

let convert = function
| Xml_lexer.EInvalidDTDDecl -> InvalidDTDDecl
| Xml_lexer.EInvalidDTDElement -> InvalidDTDElement
| Xml_lexer.EInvalidDTDTag -> InvalidDTDTag
| Xml_lexer.EDTDItemExpected -> DTDItemExpected
| Xml_lexer.EInvalidDTDAttribute -> InvalidDTDAttribute

let parse source =
try
Xml_lexer.init source;
(* local cast Dtd.dtd -> dtd *)
let dtd = (Obj.magic Xml_lexer.dtd source : dtd) in
Xml_lexer.close source;
dtd
with
| Xml_lexer.DTDError e ->
Xml_lexer.close source;
raise (Parse_error (convert e,pos source))

let parse_string s = parse (Lexing.from_string s)
let parse_in ch = parse (Lexing.from_channel ch)

let parse_file fname =
let ch = (try open_in fname with Sys_error _ -> raise (!file_not_found fname)) in
try
let x = parse (Lexing.from_channel ch) in
close_in ch;
x
with
e ->
close_in ch;
raise e

let check dtd =
let attribs = Hashtbl.create 0 in
let hdone = Hashtbl.create 0 in
let htodo = Hashtbl.create 0 in
let ftodo tag from =
try
ignore(Hashtbl.find hdone tag);
with
Not_found ->
try
match Hashtbl.find htodo tag with
| None -> Hashtbl.replace htodo tag from
| Some _ -> ()
with
Not_found ->
Hashtbl.add htodo tag from
in
let fdone tag edata =
try
ignore(Hashtbl.find hdone tag);
raise (Check_error (ElementDefinedTwice tag));
with
Not_found ->
Hashtbl.remove htodo tag;
Hashtbl.add hdone tag edata
in
let fattrib tag aname adata =
let h = (try
Hashtbl.find attribs tag
with
Not_found ->
let h = Hashtbl.create 1 in
Hashtbl.add attribs tag h;
h) in
try
ignore(Hashtbl.find h aname);
raise (Check_error (AttributeDefinedTwice (tag,aname)));
with
Not_found ->
Hashtbl.add h aname adata
in
let check_item = function
| DTDAttribute (tag,aname,atype,adef) ->
let utag = String.uppercase tag in
ftodo utag None;
fattrib utag (String.uppercase aname) (atype,adef)
| DTDElement (tag,etype) ->
let utag = String.uppercase tag in
fdone utag etype;
let check_type = function
| DTDEmpty -> ()
| DTDAny -> ()
| DTDChild x ->
let rec check_child = function
| DTDTag s -> ftodo (String.uppercase s) (Some utag)
| DTDPCData -> ()
| DTDOptional c
| DTDZeroOrMore c
| DTDOneOrMore c ->
check_child c
| DTDChoice []
| DTDChildren [] ->
raise (Check_error (ElementEmptyContructor tag))
| DTDChoice l
| DTDChildren l ->
List.iter check_child l
in
check_child x
in
check_type etype
in
List.iter check_item dtd;
Hashtbl.iter (fun t from ->
match from with
| None -> raise (Check_error (ElementNotDeclared t))
| Some tag -> raise (Check_error (ElementReferenced (t,tag)))
) htodo;
{
c_elements = hdone;
c_attribs = attribs;
}

let start_prove dtd root =
let d = {
elements = dtd.c_elements;
attribs = dtd.c_attribs;
state = Stack.create();
current = DTDChild (DTDTag root);
curtag = "_root";
} in
try
ignore(Hashtbl.find d.elements (String.uppercase root));
d
with
Not_found -> raise (Check_error (ElementNotDeclared root))


(* - for debug only - *)

let to_string_ref = ref (fun _ -> assert false)

let trace dtd tag =
let item = DTDElement ("current",dtd.current) in
printf "%s : %s\n"
(match tag with None -> "#PCDATA" | Some t -> t)
(!to_string_ref item)

exception TmpResult of dtd_result

let prove_child dtd tag =
trace dtd tag;
match dtd.current with
| DTDEmpty -> raise (Prove_error EmptyExpected)
| DTDAny -> ()
| DTDChild elt ->
let rec update = function
| DTDTag s ->
(match tag with
| None -> DTDNotMatched
| Some t when t = String.uppercase s -> DTDMatched
| Some _ -> DTDNotMatched)
| DTDPCData ->
(match tag with
| None -> DTDMatched
| Some _ -> DTDNotMatched)
| DTDOptional x ->
(match update x with
| DTDNotMatched
| DTDNext -> DTDNext
| DTDMatched
| DTDMatchedResult _ -> DTDMatched)
| DTDZeroOrMore x ->
(match update x with
| DTDNotMatched
| DTDNext -> DTDNext
| DTDMatched
| DTDMatchedResult _ -> DTDMatchedResult (DTDZeroOrMore x))
| DTDOneOrMore x ->
(match update x with
| DTDNotMatched
| DTDNext -> DTDNotMatched
| DTDMatched
| DTDMatchedResult _ -> DTDMatchedResult (DTDZeroOrMore x))
| DTDChoice l ->
(try
(match List.exists (fun x ->
match update x with
| DTDMatched -> true
| DTDMatchedResult _ as r -> raise (TmpResult r)
| DTDNext | DTDNotMatched -> false) l with
| true -> DTDMatched
| false -> DTDNotMatched)
with
TmpResult r -> r)
| DTDChildren [] -> assert false (* DTD is checked ! *)
| DTDChildren (h :: t) ->
(match update h with
| DTDNext ->
(match t with
| [] -> DTDNotMatched
| _ -> update (DTDChildren t))
| DTDNotMatched -> DTDNotMatched
| DTDMatchedResult r ->
DTDMatchedResult (DTDChildren (r::t))
| DTDMatched ->
match t with
| [] -> DTDMatched
| _ -> DTDMatchedResult (DTDChildren t))
in
match update elt with
| DTDNext | DTDNotMatched ->
(match tag with
| None -> raise (Prove_error UnexpectedPCData)
| Some t -> raise (Prove_error (UnexpectedTag t)))
| DTDMatched ->
dtd.current <- DTDEmpty
| DTDMatchedResult r ->
dtd.current <- DTDChild r

let is_nmtoken_char = function
| 'A'..'Z' | 'a'..'z' | '0'..'9' | '.' | '-' | '_' | ':' -> true
| _ -> false

let prove_attrib dtd attr aname (atype,adef) accu =
let aval = (try Some (List.assoc aname attr) with Not_found -> None) in
(match atype, aval with
| DTDCData, _ -> ()
| DTDNMToken, None -> ()
| DTDNMToken, Some v ->
for i = 0 to String.length v - 1 do
if not (is_nmtoken_char v.[i]) then raise (Prove_error (InvalidAttributeValue aname));
done
| DTDEnum l, None -> ()
| DTDEnum l, Some v ->
if not (List.exists ((=) v) l) then raise (Prove_error (InvalidAttributeValue aname)));
match adef, aval with
| DTDRequired, None -> raise (Prove_error (RequiredAttribute aname))
| DTDFixed v, Some av when v <> av -> raise (Prove_error (InvalidAttributeValue aname))
| DTDImplied, None -> accu
| DTDFixed v , None
| DTDDefault _, Some v
| DTDDefault v, None
| DTDRequired, Some v
| DTDImplied, Some v
| DTDFixed _, Some v -> (aname,v) :: accu

let check_attrib ahash (aname,_) =
try
ignore(Hashtbl.find ahash aname);
with
Not_found -> raise (Prove_error (UnexpectedAttribute aname))

let rec do_prove dtd = function
| PCData s ->
prove_child dtd None;
PCData s
| Element (tag,attr,childs) ->
let utag = String.uppercase tag in
let uattr = List.map (fun (aname,aval) -> String.uppercase aname , aval) attr in
prove_child dtd (Some utag);
Stack.push (dtd.curtag,dtd.current) dtd.state;
let elt = (try Hashtbl.find dtd.elements utag with Not_found -> raise (Prove_error (UnexpectedTag tag))) in
let ahash = (try Hashtbl.find dtd.attribs utag with Not_found -> empty_hash) in
dtd.curtag <- tag;
dtd.current <- elt;
List.iter (check_attrib ahash) uattr;
let attr = Hashtbl.fold (prove_attrib dtd uattr) ahash [] in
let childs = ref (List.map (do_prove dtd) childs) in
(match dtd.current with
| DTDAny
| DTDEmpty -> ()
| DTDChild elt ->
let name = ref "" in
let rec check = function
| DTDTag t ->
name := t;
false
| DTDPCData when !childs = [] ->
childs := [PCData ""];
true
| DTDPCData ->
name := "#PCDATA";
false
| DTDOptional _ -> true
| DTDZeroOrMore _ -> true
| DTDOneOrMore e ->
ignore(check e);
false
| DTDChoice l -> List.exists check l
| DTDChildren l -> List.for_all check l
in
match check elt with
| true -> ()
| false -> raise (Prove_error (ChildExpected !name)));
let ctag, cur = Stack.pop dtd.state in
dtd.curtag <- tag;
dtd.current <- cur;
Element (tag,attr,!childs)

let prove dtd root xml =
do_prove (start_prove dtd root) xml

let parse_error_msg = function
| InvalidDTDDecl -> "Invalid DOCTYPE declaration"
| InvalidDTDElement -> "Invalid DTD element declaration"
| InvalidDTDAttribute -> "Invalid DTD attribute declaration"
| InvalidDTDTag -> "Invalid DTD tag"
| DTDItemExpected -> "DTD item expected"

let parse_error (msg,pos) =
let pos = (Obj.magic pos : error_pos) in
if pos.emin = pos.emax then
sprintf "%s line %d character %d" (parse_error_msg msg) pos.eline (pos.emin - pos.eline_start)
else
sprintf "%s line %d characters %d-%d" (parse_error_msg msg) pos.eline (pos.emin - pos.eline_start) (pos.emax - pos.eline_start)

let check_error = function
| ElementDefinedTwice tag -> sprintf "Element '%s' defined twice" tag
| AttributeDefinedTwice (tag,aname) -> sprintf "Attribute '%s' of element '%s' defined twice" aname tag
| ElementEmptyContructor tag -> sprintf "Element '%s' has empty constructor" tag
| ElementReferenced (tag,from) -> sprintf "Element '%s' referenced by '%s' is not declared" tag from
| ElementNotDeclared tag -> sprintf "Element '%s' needed but is not declared" tag

let prove_error = function
| UnexpectedPCData -> "Unexpected PCData"
| UnexpectedTag tag -> sprintf "Unexpected tag : '%s'" tag
| UnexpectedAttribute att -> sprintf "Unexpected attribute : '%s'" att
| InvalidAttributeValue att -> sprintf "Invalid attribute value for '%s'" att
| RequiredAttribute att -> sprintf "Required attribute not found : '%s'" att
| ChildExpected cname -> sprintf "Child expected : '%s'" cname
| EmptyExpected -> "No more children expected"

let to_string = function
| DTDAttribute (tag,aname,atype,adef) ->
let atype_to_string = function
| DTDCData -> "CDATA"
| DTDNMToken -> "NMTOKEN"
| DTDEnum l -> sprintf "(%s)" (String.concat "|" l)
in
let adefault_to_string = function
| DTDDefault s -> sprintf "\"%s\"" s
| DTDRequired -> "#REQUIRED"
| DTDImplied -> "#IMPLIED"
| DTDFixed s -> sprintf "#FIXED \"%s\"" s
in
sprintf "<!ATTLIST %s %s %s %s>" tag aname (atype_to_string atype) (adefault_to_string adef)
| DTDElement (tag,etype) ->
let rec echild_to_string = function
| DTDTag s -> s
| DTDPCData -> "#PCDATA"
| DTDOptional c -> sprintf "%s?" (echild_to_string c)
| DTDZeroOrMore c -> sprintf "%s*" (echild_to_string c)
| DTDOneOrMore c -> sprintf "%s+" (echild_to_string c)
| DTDChoice [c] -> echild_to_string c
| DTDChoice l -> sprintf "(%s)" (String.concat "|" (List.map echild_to_string l))
| DTDChildren [c] -> echild_to_string c
| DTDChildren l -> sprintf "(%s)" (String.concat "," (List.map echild_to_string l))
in
let etype_to_string = function
| DTDEmpty -> "EMPTY"
| DTDAny -> "ANY"
| DTDChild x ->
let rec op_to_string = function
| DTDOptional c -> sprintf "%s?" (op_to_string c)
| DTDZeroOrMore c -> sprintf "%s*" (op_to_string c)
| DTDOneOrMore c -> sprintf "%s+" (op_to_string c)
| _ -> ""
in
let rec root = function
| DTDOptional c
| DTDZeroOrMore c
| DTDOneOrMore c ->
root c
| DTDChoice [_]
| DTDChildren [_] as x ->
x, false
| DTDChoice _
| DTDChildren _ as x ->
x, true
| x -> x, false
in
match root x with
| r, true -> sprintf "%s%s" (echild_to_string r) (op_to_string x)
| r, false -> sprintf "(%s%s)" (echild_to_string r) (op_to_string x)
in
sprintf "<!ELEMENT %s %s>" tag (etype_to_string etype)

;;
to_string_ref := to_stringocaml-xml-light-2.2/dtd.mli000064400000000000000000000127761167637302700156600ustar00rootroot00000000000000(*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)

(** Xml Light DTD

This module provide several functions to create, check, and use DTD
to prove Xml documents : {ul
{li using the DTD types, you can directly create your own DTD structure}
{li the {!Dtd.check} function can then be used to check that all DTD
states have been declared, that no attributes are declared twice,
and so on.}
{li the {!Dtd.prove} function can be used to check an {!Xml} data
structure with a checked DTD. The function will return the
expanded Xml document or raise an exception if the DTD proving
fails.}
}

{i Note about ENTITIES:}

While parsing Xml, PCDATA is always parsed and
the Xml entities &amp; &gt; &lt; &apos; &quot; are replaced by their
corresponding ASCII characters. For Xml attributes, theses can be
put between either double or simple quotes, and the backslash character
can be used to escape inner quotes. There is no support for CDATA Xml
nodes or PCDATA attributes declarations in DTD, and no support for
user-defined entities using the ENTITY DTD element.
*)

(** {6 The DTD Types} *)

type dtd_child =
| DTDTag of string
| DTDPCData
| DTDOptional of dtd_child
| DTDZeroOrMore of dtd_child
| DTDOneOrMore of dtd_child
| DTDChoice of dtd_child list
| DTDChildren of dtd_child list

type dtd_element_type =
| DTDEmpty
| DTDAny
| DTDChild of dtd_child

type dtd_attr_default =
| DTDDefault of string
| DTDRequired
| DTDImplied
| DTDFixed of string

type dtd_attr_type =
| DTDCData
| DTDNMToken
| DTDEnum of string list

type dtd_item =
| DTDAttribute of string * string * dtd_attr_type * dtd_attr_default
| DTDElement of string * dtd_element_type

type dtd = dtd_item list

type checked

(** {6 The DTD Functions} *)

(** Parse the named file into a Dtd data structure. Raise
{!Xml.File_not_found} if an error occured while opening the file.
Raise {!Dtd.Parse_error} if parsing failed. *)
val parse_file : string -> dtd

(** Read the content of the in_channel and parse it into a Dtd data
structure. Raise {!Dtd.Parse_error} if parsing failed. *)
val parse_in : in_channel -> dtd

(** Parse the string containing a Dtd document into a Dtd data
structure. Raise {!Dtd.Parse_error} if parsing failed. *)
val parse_string : string -> dtd

(** Check the Dtd data structure declaration and return a checked
DTD. Raise {!Dtd.Check_error} if the DTD checking failed. *)
val check : dtd -> checked

(** Prove an Xml document using a checked DTD and an entry point.
The entry point is the first excepted tag of the Xml document,
the returned Xml document has the same structure has the original
one, excepted that non declared optional attributes have been set
to their default value specified in the DTD.
Raise {!Dtd.Check_error} [ElementNotDeclared] if the entry point
is not found, raise {!Dtd.Prove_error} if the Xml document failed
to be proved with the DTD. *)
val prove : checked -> string -> Xml.xml -> Xml.xml

(** Print a DTD element into a string. You can easily get a DTD
document from a DTD data structure using for example
[String.concat "\n" (List.map Dtd.to_string) my_dtd] *)
val to_string : dtd_item -> string

(** {6 The DTD Exceptions} *)

(** There is three types of DTD excecptions : {ul
{li {!Dtd.Parse_error} is raised when an error occured while
parsing a DTD document into a DTD data structure.}
{li {!Dtd.Check_error} is raised when an error occured while
checking a DTD data structure for completeness, or when the
prove entry point is not found when calling {!Dtd.prove}.}
{li {!Dtd.Prove_error} is raised when an error occured while
proving an Xml document.}
}

Several string conversion functions are provided to enable you
to report errors to the user.
*)

type parse_error_msg =
| InvalidDTDDecl
| InvalidDTDElement
| InvalidDTDAttribute
| InvalidDTDTag
| DTDItemExpected

type check_error =
| ElementDefinedTwice of string
| AttributeDefinedTwice of string * string
| ElementEmptyContructor of string
| ElementReferenced of string * string
| ElementNotDeclared of string

type prove_error =
| UnexpectedPCData
| UnexpectedTag of string
| UnexpectedAttribute of string
| InvalidAttributeValue of string
| RequiredAttribute of string
| ChildExpected of string
| EmptyExpected

type parse_error = parse_error_msg * Xml.error_pos

exception Parse_error of parse_error
exception Check_error of check_error
exception Prove_error of prove_error

val parse_error : parse_error -> string
val check_error : check_error -> string
val prove_error : prove_error -> string

(**/**)

(* internal usage only... *)
val _raises : (string -> exn) -> unitocaml-xml-light-2.2/test.ml000064400000000000000000000035311167637302700157000ustar00rootroot00000000000000(*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)

open Xml
open Dtd

let parse data =
match data.[0] with
| '#' -> Xml.parse_file (String.sub data 1 ((String.length data)-2))
| _ -> Xml.parse_string data

;;
let buf = ref "" in
print_endline "Please enter some XML data followed (press return twice to parse) :";
try
while true do
match read_line() with
| "" when !buf <> "" ->
let data = !buf in
buf := "";
(try
let x = parse data in
print_endline "Parsing...";
print_endline (Xml.to_string_fmt x);
with
| Xml.Error msg as e ->
Printf.printf "Xml error : %s\n" (Xml.error msg)
| Dtd.Parse_error msg as e ->
Printf.printf "Dtd parse error : %s\n" (Dtd.parse_error msg)
| Dtd.Check_error msg as e ->
Printf.printf "Dtd check error : %s\n" (Dtd.check_error msg)
| Dtd.Prove_error msg as e ->
Printf.printf "Dtd prove error : %s\n" (Dtd.prove_error msg))
| s ->
buf := !buf ^ s ^ "\n"
done
with
End_of_file -> print_endline "Exit."ocaml-xml-light-2.2/xml.ml000064400000000000000000000162341167637302700155250ustar00rootroot00000000000000(*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)

open Printf

type xml =
| Element of (string * (string * string) list * xml list)
| PCData of string

type error_pos = {
eline : int;
eline_start : int;
emin : int;
emax : int;
}

type error_msg =
| UnterminatedComment
| UnterminatedString
| UnterminatedEntity
| IdentExpected
| CloseExpected
| NodeExpected
| AttributeNameExpected
| AttributeValueExpected
| EndOfTagExpected of string
| EOFExpected

type error = error_msg * error_pos

exception Error of error
exception File_not_found of string

exception Not_element of xml
exception Not_pcdata of xml
exception No_attribute of string

let default_parser = XmlParser.make()

let pos source =
let line, lstart, min, max = Xml_lexer.pos source in
{
eline = line;
eline_start = lstart;
emin = min;
emax = max;
}

let parse (p:XmlParser.t) (source:XmlParser.source) =
(* local cast Xml.xml -> xml *)
(Obj.magic XmlParser.parse p source : xml)

let parse_in ch = parse default_parser (XmlParser.SChannel ch)
let parse_string str = parse default_parser (XmlParser.SString str)

let parse_file f =
let p = XmlParser.make() in
let path = Filename.dirname f in
XmlParser.resolve p (fun file ->
let name = (match path with "." -> file | _ -> path ^ "/" ^ file) in
Dtd.check (Dtd.parse_file name)
);
parse p (XmlParser.SFile f)

let error_msg = function
| UnterminatedComment -> "Unterminated comment"
| UnterminatedString -> "Unterminated string"
| UnterminatedEntity -> "Unterminated entity"
| IdentExpected -> "Ident expected"
| CloseExpected -> "Element close expected"
| NodeExpected -> "Xml node expected"
| AttributeNameExpected -> "Attribute name expected"
| AttributeValueExpected -> "Attribute value expected"
| EndOfTagExpected tag -> sprintf "End of tag expected : '%s'" tag
| EOFExpected -> "End of file expected"

let error (msg,pos) =
if pos.emin = pos.emax then
sprintf "%s line %d character %d" (error_msg msg) pos.eline (pos.emin - pos.eline_start)
else
sprintf "%s line %d characters %d-%d" (error_msg msg) pos.eline (pos.emin - pos.eline_start) (pos.emax - pos.eline_start)

let line e = e.eline

let range e =
e.emin - e.eline_start , e.emax - e.eline_start

let abs_range e =
e.emin , e.emax

let tag = function
| Element (tag,_,_) -> tag
| x -> raise (Not_element x)

let pcdata = function
| PCData text -> text
| x -> raise (Not_pcdata x)

let attribs = function
| Element (_,attr,_) -> attr
| x -> raise (Not_element x)

let attrib x att =
match x with
| Element (_,attr,_) ->
(try
let att = String.lowercase att in
snd (List.find (fun (n,_) -> String.lowercase n = att) attr)
with
Not_found ->
raise (No_attribute att))
| x ->
raise (Not_element x)

let children = function
| Element (_,_,clist) -> clist
| x -> raise (Not_element x)

(*let enum = function
| Element (_,_,clist) -> List.to_enum clist
| x -> raise (Not_element x)
*)

let iter f = function
| Element (_,_,clist) -> List.iter f clist
| x -> raise (Not_element x)

let map f = function
| Element (_,_,clist) -> List.map f clist
| x -> raise (Not_element x)

let fold f v = function
| Element (_,_,clist) -> List.fold_left f v clist
| x -> raise (Not_element x)

let tmp = Buffer.create 200

let buffer_pcdata text =
let l = String.length text in
for p = 0 to l-1 do
match text.[p] with
| '>' -> Buffer.add_string tmp "&gt;"
| '<' -> Buffer.add_string tmp "&lt;"
| '&' ->
if p < l-1 && text.[p+1] = '#' then
Buffer.add_char tmp '&'
else
Buffer.add_string tmp "&amp;"
| '\'' -> Buffer.add_string tmp "&apos;"
| '"' -> Buffer.add_string tmp "&quot;"
| c -> Buffer.add_char tmp c
done

let buffer_attr (n,v) =
Buffer.add_char tmp ' ';
Buffer.add_string tmp n;
Buffer.add_string tmp "=\"";
let l = String.length v in
for p = 0 to l-1 do
match v.[p] with
| '\\' -> Buffer.add_string tmp "\\\\"
| '"' -> Buffer.add_string tmp "\\\""
| c -> Buffer.add_char tmp c
done;
Buffer.add_char tmp '"'

let to_string x =
let pcdata = ref false in
let rec loop = function
| Element (tag,alist,[]) ->
Buffer.add_char tmp '<';
Buffer.add_string tmp tag;
List.iter buffer_attr alist;
Buffer.add_string tmp "/>";
pcdata := false;
| Element (tag,alist,l) ->
Buffer.add_char tmp '<';
Buffer.add_string tmp tag;
List.iter buffer_attr alist;
Buffer.add_char tmp '>';
pcdata := false;
List.iter loop l;
Buffer.add_string tmp "</";
Buffer.add_string tmp tag;
Buffer.add_char tmp '>';
pcdata := false;
| PCData text ->
if !pcdata then Buffer.add_char tmp ' ';
buffer_pcdata text;
pcdata := true;
in
Buffer.reset tmp;
loop x;
let s = Buffer.contents tmp in
Buffer.reset tmp;
s

let to_string_fmt x =
let rec loop ?(newl=false) tab = function
| Element (tag,alist,[]) ->
Buffer.add_string tmp tab;
Buffer.add_char tmp '<';
Buffer.add_string tmp tag;
List.iter buffer_attr alist;
Buffer.add_string tmp "/>";
if newl then Buffer.add_char tmp '\n';
| Element (tag,alist,[PCData text]) ->
Buffer.add_string tmp tab;
Buffer.add_char tmp '<';
Buffer.add_string tmp tag;
List.iter buffer_attr alist;
Buffer.add_string tmp ">";
buffer_pcdata text;
Buffer.add_string tmp "</";
Buffer.add_string tmp tag;
Buffer.add_char tmp '>';
if newl then Buffer.add_char tmp '\n';
| Element (tag,alist,l) ->
Buffer.add_string tmp tab;
Buffer.add_char tmp '<';
Buffer.add_string tmp tag;
List.iter buffer_attr alist;
Buffer.add_string tmp ">\n";
List.iter (loop ~newl:true (tab^" ")) l;
Buffer.add_string tmp tab;
Buffer.add_string tmp "</";
Buffer.add_string tmp tag;
Buffer.add_char tmp '>';
if newl then Buffer.add_char tmp '\n';
| PCData text ->
buffer_pcdata text;
if newl then Buffer.add_char tmp '\n';
in
Buffer.reset tmp;
loop "" x;
let s = Buffer.contents tmp in
Buffer.reset tmp;
s

;;
XmlParser._raises (fun x p ->
(* local cast : Xml.error_msg -> error_msg *)
Error ((Obj.magic x : error_msg),pos p))
(fun f -> File_not_found f)
(fun x p -> Dtd.Parse_error (x,
(* local cast : Xml.error_pos -> error_pos *)
(Obj.magic (pos p))));
Dtd._raises (fun f -> File_not_found f);ocaml-xml-light-2.2/xml.mli000064400000000000000000000134041167637302700156720ustar00rootroot00000000000000(*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)

(** Xml Light

Xml Light is a minimal Xml parser & printer for OCaml.
It provide few functions to parse a basic Xml document into
an OCaml data structure and to print back the data structures
to an Xml document.

Xml Light has also support for {b DTD} (Document Type Definition).

{i (c)Copyright 2002-2003 Nicolas Cannasse}
*)

(** {6 Xml Data Structure} *)

(** An Xml node is either
[Element (tag-name, attributes, children)] or [PCData text] *)
type xml =
| Element of (string * (string * string) list * xml list)
| PCData of string

(** {6 Xml Parsing} *)

(** For easily parsing an Xml data source into an xml data structure,
you can use theses functions. But if you want advanced parsing usage,
please look at the {!XmlParser} module.
All the parsing functions can raise some exceptions, see the
{{:#exc}Exceptions} section for more informations. *)

(** Parse the named file into an Xml data structure. *)
val parse_file : string -> xml

(** Read the content of the in_channel and parse it into an Xml data
structure. *)
val parse_in : in_channel -> xml

(** Parse the string containing an Xml document into an Xml data
structure. *)
val parse_string : string -> xml

(** {6:exc Xml Exceptions} *)

(** Several exceptions can be raised when parsing an Xml document : {ul
{li {!Xml.Error} is raised when an xml parsing error occurs. the
{!Xml.error_msg} tells you which error occured during parsing
and the {!Xml.error_pos} can be used to retreive the document
location where the error occured at.}
{li {!Xml.File_not_found} is raised when and error occured while
opening a file with the {!Xml.parse_file} function or when a
DTD file declared by the Xml document is not found {i (see the
{!XmlParser} module for more informations on how to handle the
DTD file loading)}.}
}
If the Xml document is containing a DTD, then some other exceptions
can be raised, see the module {!Dtd} for more informations.
*)

type error_pos

type error_msg =
| UnterminatedComment
| UnterminatedString
| UnterminatedEntity
| IdentExpected
| CloseExpected
| NodeExpected
| AttributeNameExpected
| AttributeValueExpected
| EndOfTagExpected of string
| EOFExpected

type error = error_msg * error_pos

exception Error of error

exception File_not_found of string

(** Get a full error message from an Xml error. *)
val error : error -> string

(** Get the Xml error message as a string. *)
val error_msg : error_msg -> string

(** Get the line the error occured at. *)
val line : error_pos -> int

(** Get the relative character range (in current line) the error occured at.*)
val range : error_pos -> int * int

(** Get the absolute character range the error occured at. *)
val abs_range : error_pos -> int * int

(** {6 Xml Functions} *)

exception Not_element of xml
exception Not_pcdata of xml
exception No_attribute of string

(** [tag xdata] returns the tag value of the xml node.
Raise {!Xml.Not_element} if the xml is not an element *)
val tag : xml -> string

(** [pcdata xdata] returns the PCData value of the xml node.
Raise {!Xml.Not_pcdata} if the xml is not a PCData *)
val pcdata : xml -> string

(** [attribs xdata] returns the attribute list of the xml node.
First string if the attribute name, second string is attribute value.
Raise {!Xml.Not_element} if the xml is not an element *)
val attribs : xml -> (string * string) list

(** [attrib xdata "href"] returns the value of the ["href"]
attribute of the xml node (attribute matching is case-insensitive).
Raise {!Xml.No_attribute} if the attribute does not exists in the node's
attribute list
Raise {!Xml.Not_element} if the xml is not an element *)
val attrib : xml -> string -> string

(** [children xdata] returns the children list of the xml node
Raise {!Xml.Not_element} if the xml is not an element *)
val children : xml -> xml list

(*** [enum xdata] returns the children enumeration of the xml node
Raise {!Xml.Not_element} if the xml is not an element *)
(* val enum : xml -> xml Enum.t *)

(** [iter f xdata] calls f on all children of the xml node.
Raise {!Xml.Not_element} if the xml is not an element *)
val iter : (xml -> unit) -> xml -> unit

(** [map f xdata] is equivalent to [List.map f (Xml.children xdata)]
Raise {!Xml.Not_element} if the xml is not an element *)
val map : (xml -> 'a) -> xml -> 'a list

(** [fold f init xdata] is equivalent to
[List.fold_left f init (Xml.children xdata)]
Raise {!Xml.Not_element} if the xml is not an element *)
val fold : ('a -> xml -> 'a) -> 'a -> xml -> 'a

(** {6 Xml Printing} *)

(** Print the xml data structure into a compact xml string (without
any user-readable formating ). *)
val to_string : xml -> string

(** Print the xml data structure into an user-readable string with
tabs and lines break between different nodes. *)
val to_string_fmt : xml -> string
ocaml-xml-light-2.2/xmlParser.ml000064400000000000000000000121431167637302700166750ustar00rootroot00000000000000(*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
* Copyright (C) 2003 Jacques Garrigue
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)

open Printf
open Dtd
open Xml

type t = {
mutable prove : bool;
mutable check_eof : bool;
mutable concat_pcdata : bool;
mutable resolve : (string -> checked);
}

type source =
| SFile of string
| SChannel of in_channel
| SString of string
| SLexbuf of Lexing.lexbuf

type state = {
source : Lexing.lexbuf;
stack : Xml_lexer.token Stack.t;
xparser : t;
}

exception Internal_error of Xml.error_msg
exception NoMoreData

let xml_error = ref (fun _ -> assert false)
let dtd_error = ref (fun _ -> assert false)
let file_not_found = ref (fun _ -> assert false)

let _raises e f d =
xml_error := e;
file_not_found := f;
dtd_error := d

let make () =
{
prove = true;
check_eof = true;
concat_pcdata = true;
resolve = (fun file -> raise (!file_not_found file))
}

let prove p v = p.prove <- v
let resolve p f = p.resolve <- f
let check_eof p v = p.check_eof <- v
let concat_pcdata p v = p.concat_pcdata <- v

let pop s =
try
Stack.pop s.stack
with
Stack.Empty ->
Xml_lexer.token s.source

let push t s =
Stack.push t s.stack

let rec read_node s =
match pop s with
| Xml_lexer.PCData s -> PCData s
| Xml_lexer.Tag (tag, attr, true) -> Element (tag, attr, [])
| Xml_lexer.Tag (tag, attr, false) -> Element (tag, attr, read_elems ~tag s)
| t ->
push t s;
raise NoMoreData
and
read_elems ?tag s =
let elems = ref [] in
(try
while true do
match s.xparser.concat_pcdata , read_node s , !elems with
| true , PCData c , (PCData c2) :: q ->
elems := PCData (sprintf "%s\n%s" c2 c) :: q
| _ , x , l ->
elems := x :: l
done
with
NoMoreData -> ());
match pop s with
| Xml_lexer.Endtag s when Some s = tag -> List.rev !elems
| Xml_lexer.Eof when tag = None -> List.rev !elems
| t ->
match tag with
| None -> raise (Internal_error EOFExpected)
| Some s -> raise (Internal_error (EndOfTagExpected s))

let read_xml s =
match s.xparser.prove, pop s with
| true, Xml_lexer.DocType (root, Xml_lexer.DTDFile file) ->
let pos = Xml_lexer.pos s.source in
let dtd = s.xparser.resolve file in
Xml_lexer.restore pos;
let x = read_node s in
Dtd.prove dtd root x
| true, Xml_lexer.DocType (root, Xml_lexer.DTDData dtd) ->
let dtd = Dtd.check dtd in
let x = read_node s in
Dtd.prove dtd root x
| false, Xml_lexer.DocType _ ->
read_node s
| _, t ->
push t s;
read_node s

let convert = function
| Xml_lexer.EUnterminatedComment -> UnterminatedComment
| Xml_lexer.EUnterminatedString -> UnterminatedString
| Xml_lexer.EIdentExpected -> IdentExpected
| Xml_lexer.ECloseExpected -> CloseExpected
| Xml_lexer.ENodeExpected -> NodeExpected
| Xml_lexer.EAttributeNameExpected -> AttributeNameExpected
| Xml_lexer.EAttributeValueExpected -> AttributeValueExpected
| Xml_lexer.EUnterminatedEntity -> UnterminatedEntity

let dtd_convert = function
| Xml_lexer.EInvalidDTDDecl -> InvalidDTDDecl
| Xml_lexer.EInvalidDTDTag -> InvalidDTDTag
| Xml_lexer.EDTDItemExpected -> DTDItemExpected
| Xml_lexer.EInvalidDTDElement -> InvalidDTDElement
| Xml_lexer.EInvalidDTDAttribute -> InvalidDTDAttribute

let do_parse xparser source =
try
Xml_lexer.init source;
let s = { source = source; xparser = xparser; stack = Stack.create(); } in
let x = read_xml s in
if xparser.check_eof && pop s <> Xml_lexer.Eof then raise (Internal_error EOFExpected);
Xml_lexer.close source;
x
with
| NoMoreData ->
Xml_lexer.close source;
raise (!xml_error NodeExpected source)
| Internal_error e ->
Xml_lexer.close source;
raise (!xml_error e source)
| Xml_lexer.Error e ->
Xml_lexer.close source;
raise (!xml_error (convert e) source)
| Xml_lexer.DTDError e ->
Xml_lexer.close source;
raise (!dtd_error (dtd_convert e) source)

let parse p = function
| SChannel ch -> do_parse p (Lexing.from_channel ch)
| SString str -> do_parse p (Lexing.from_string str)
| SLexbuf lex -> do_parse p lex
| SFile fname ->
let ch = (try open_in fname with Sys_error _ -> raise (!file_not_found fname)) in
try
let x = do_parse p (Lexing.from_channel ch) in
close_in ch;
x
with
e ->
close_in ch;
raise e
ocaml-xml-light-2.2/xmlParser.mli000064400000000000000000000066671167637302700170640ustar00rootroot00000000000000(*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)

(** Xml Light Parser

While basic parsing functions can be used in the {!Xml} module, this module
is providing a way to create, configure and run an Xml parser.

*)

(** Abstract type for an Xml parser. *)
type t

(** Several kind of resources can contain Xml documents. *)
type source =
| SFile of string
| SChannel of in_channel
| SString of string
| SLexbuf of Lexing.lexbuf

(** This function returns a new parser with default options. *)
val make : unit -> t

(** This function enable or disable automatic DTD proving with the parser.
Note that Xml documents having no reference to a DTD are never proved
when parsed (but you can prove them later using the {!Dtd} module
{i (by default, prove is true)}. *)
val prove : t -> bool -> unit

(** When parsing an Xml document from a file using the {!Xml.parse_file}
function, the DTD file if declared by the Xml document has to be in the
same directory as the xml file. When using other parsing functions,
such as on a string or on a channel, the parser will raise everytime
{!Xml.File_not_found} if a DTD file is needed and prove enabled. To enable
the DTD loading of the file, the user have to configure the Xml parser
with a [resolve] function which is taking as argument the DTD filename and
is returning a checked DTD. The user can then implement any kind of DTD
loading strategy, and can use the {!Dtd} module functions to parse and check
the DTD file {i (by default, the resolve function is raising}
{!Xml.File_not_found}). *)
val resolve : t -> (string -> Dtd.checked) -> unit

(** When a Xml document is parsed, the parser will check that the end of the
document is reached, so for example parsing ["<A/><B/>"] will fail instead
of returning only the A element. You can turn off this check by setting
[check_eof] to [false] {i (by default, check_eof is true)}. *)
val check_eof : t -> bool -> unit

(** Once the parser is configurated, you can run the parser on a any kind
of xml document source to parse its contents into an Xml data structure. *)
val parse : t -> source -> Xml.xml

(** When several PCData elements are separed by a \n (or \r\n), you can
either split the PCData in two distincts PCData or merge them with \n
as seperator into one PCData. The default behavior is to concat the
PCData, but this can be changed for a given parser with this flag. *)
val concat_pcdata : t -> bool -> unit

(**/**)

(* internal usage only... *)
val _raises : (Xml.error_msg -> Lexing.lexbuf -> exn) -> (string -> exn) -> (Dtd.parse_error_msg -> Lexing.lexbuf -> exn) -> unitocaml-xml-light-2.2/xml_lexer.mli000064400000000000000000000033041167637302700170670ustar00rootroot00000000000000(*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)

type error =
| EUnterminatedComment
| EUnterminatedString
| EIdentExpected
| ECloseExpected
| ENodeExpected
| EAttributeNameExpected
| EAttributeValueExpected
| EUnterminatedEntity

type dtd_error =
| EInvalidDTDDecl
| EInvalidDTDTag
| EDTDItemExpected
| EInvalidDTDElement
| EInvalidDTDAttribute

exception Error of error
exception DTDError of dtd_error

type dtd_decl =
| DTDFile of string
| DTDData of Dtd.dtd

type token =
| Tag of string * (string * string) list * bool
| PCData of string
| Endtag of string
| DocType of (string * dtd_decl)
| Eof

type pos = int * int * int * int

val init : Lexing.lexbuf -> unit
val close : Lexing.lexbuf -> unit
val token : Lexing.lexbuf -> token
val dtd : Lexing.lexbuf -> Dtd.dtd
val pos : Lexing.lexbuf -> pos
val restore : pos -> unitocaml-xml-light-2.2/xml_lexer.mll000064400000000000000000000300651167637302700170760ustar00rootroot00000000000000{(*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)

open Lexing
open Xml_parser
open Dtd

type error =
| EUnterminatedComment
| EUnterminatedString
| EIdentExpected
| ECloseExpected
| ENodeExpected
| EAttributeNameExpected
| EAttributeValueExpected
| EUnterminatedEntity

type dtd_error =
| EInvalidDTDDecl
| EInvalidDTDTag
| EDTDItemExpected
| EInvalidDTDElement
| EInvalidDTDAttribute

exception Error of error
exception DTDError of dtd_error

type pos = int * int * int * int

type dtd_decl =
| DTDFile of string
| DTDData of dtd

type dtd_item_type =
| TElement
| TAttribute

type token =
| Tag of string * (string * string) list * bool
| PCData of string
| Endtag of string
| DocType of (string * dtd_decl)
| Eof

let last_pos = ref 0
and current_line = ref 0
and current_line_start = ref 0

let tmp = Buffer.create 200

let idents = Hashtbl.create 0

let _ = begin
Hashtbl.add idents "gt;" ">";
Hashtbl.add idents "lt;" "<";
Hashtbl.add idents "amp;" "&";
Hashtbl.add idents "apos;" "'";
Hashtbl.add idents "quot;" "\"";
end

let init lexbuf =
current_line := 1;
current_line_start := lexeme_start lexbuf;
last_pos := !current_line_start

let close lexbuf =
Buffer.reset tmp

let pos lexbuf =
!current_line , !current_line_start ,
!last_pos ,
lexeme_start lexbuf

let restore (cl,cls,lp,_) =
current_line := cl;
current_line_start := cls;
last_pos := lp

let newline lexbuf =
incr current_line;
last_pos := lexeme_end lexbuf;
current_line_start := !last_pos

let error lexbuf e =
last_pos := lexeme_start lexbuf;
raise (Error e)

let dtd_error lexbuf e =
last_pos := lexeme_start lexbuf;
raise (DTDError e)
}

let newline = ['\n']
let break = ['\r']
let space = [' ' '\t']
let identchar = ['A'-'Z' 'a'-'z' '_' '0'-'9' ':' '-']
let entitychar = ['A'-'Z' 'a'-'z']
let pcchar = [^ '\r' '\n' '<' '>' '&']

rule token = parse
| newline
{
newline lexbuf;
token lexbuf
}
| (space | break) +
{
last_pos := lexeme_end lexbuf;
token lexbuf
}
| "<!DOCTYPE"
{
last_pos := lexeme_start lexbuf;
ignore_spaces lexbuf;
let root = ident_name lexbuf in
ignore_spaces lexbuf;
let data = dtd_data lexbuf in
DocType (root, data)
}
| "<!--"
{
last_pos := lexeme_start lexbuf;
comment lexbuf;
token lexbuf
}
| "<?"
{
last_pos := lexeme_start lexbuf;
header lexbuf;
token lexbuf;
}
| '<' space* '/' space*
{
last_pos := lexeme_start lexbuf;
let tag = ident_name lexbuf in
ignore_spaces lexbuf;
close_tag lexbuf;
Endtag tag
}
| '<' space*
{
last_pos := lexeme_start lexbuf;
let tag = ident_name lexbuf in
ignore_spaces lexbuf;
let attribs, closed = attributes lexbuf in
Tag(tag, attribs, closed)
}
| "&#"
{
last_pos := lexeme_start lexbuf;
Buffer.reset tmp;
Buffer.add_string tmp (lexeme lexbuf);
PCData (pcdata lexbuf)
}
| '&'
{
last_pos := lexeme_start lexbuf;
Buffer.reset tmp;
Buffer.add_string tmp (entity lexbuf);
PCData (pcdata lexbuf)
}
| space* pcchar+
{
last_pos := lexeme_start lexbuf;
Buffer.reset tmp;
Buffer.add_string tmp (lexeme lexbuf);
PCData (pcdata lexbuf)
}
| eof { Eof }
| _
{ error lexbuf ENodeExpected }

and ignore_spaces = parse
| newline
{
newline lexbuf;
ignore_spaces lexbuf
}
| (space | break) +
{ ignore_spaces lexbuf }
| ""
{ () }

and comment = parse
| newline
{
newline lexbuf;
comment lexbuf
}
| "-->"
{ () }
| eof
{ raise (Error EUnterminatedComment) }
| _
{ comment lexbuf }

and header = parse
| newline
{
newline lexbuf;
header lexbuf
}
| "?>"
{ () }
| eof
{ error lexbuf ECloseExpected }
| _
{ header lexbuf }

and pcdata = parse
| pcchar+
{
Buffer.add_string tmp (lexeme lexbuf);
pcdata lexbuf
}
| "&#"
{
Buffer.add_string tmp (lexeme lexbuf);
pcdata lexbuf;
}
| '&'
{
Buffer.add_string tmp (entity lexbuf);
pcdata lexbuf
}
| ""
{ Buffer.contents tmp }

and entity = parse
| entitychar+ ';'
{
let ident = lexeme lexbuf in
try
Hashtbl.find idents (String.lowercase ident)
with
Not_found -> "&" ^ ident
}
| _ | eof
{ raise (Error EUnterminatedEntity) }

and ident_name = parse
| identchar+
{ lexeme lexbuf }
| _ | eof
{ error lexbuf EIdentExpected }

and close_tag = parse
| '>'
{ () }
| _ | eof
{ error lexbuf ECloseExpected }

and attributes = parse
| '>'
{ [], false }
| "/>"
{ [], true }
| "" (* do not read a char ! *)
{
let key = attribute lexbuf in
let data = attribute_data lexbuf in
ignore_spaces lexbuf;
let others, closed = attributes lexbuf in
(key, data) :: others, closed
}

and attribute = parse
| identchar+
{ lexeme lexbuf }
| _ | eof
{ error lexbuf EAttributeNameExpected }

and attribute_data = parse
| space* '=' space* '"'
{
Buffer.reset tmp;
last_pos := lexeme_end lexbuf;
dq_string lexbuf
}
| space* '=' space* '\''
{
Buffer.reset tmp;
last_pos := lexeme_end lexbuf;
q_string lexbuf
}
| _ | eof
{ error lexbuf EAttributeValueExpected }

and dq_string = parse
| '"'
{ Buffer.contents tmp }
| '\\' [ '"' '\\' ]
{
Buffer.add_char tmp (lexeme_char lexbuf 1);
dq_string lexbuf
}
| eof
{ raise (Error EUnterminatedString) }
| _
{
Buffer.add_char tmp (lexeme_char lexbuf 0);
dq_string lexbuf
}

and q_string = parse
| '\''
{ Buffer.contents tmp }
| '\\' [ '\'' '\\' ]
{
Buffer.add_char tmp (lexeme_char lexbuf 1);
q_string lexbuf
}
| eof
{ raise (Error EUnterminatedString) }
| _
{
Buffer.add_char tmp (lexeme_char lexbuf 0);
q_string lexbuf
}

and dtd_data = parse
| "PUBLIC"
{
ignore_spaces lexbuf;
(* skipping Public ID *)
let _ = dtd_file lexbuf in
let file = dtd_file lexbuf in
dtd_end_decl lexbuf;
DTDFile file
}
| "SYSTEM"
{
ignore_spaces lexbuf;
let file = dtd_file lexbuf in
dtd_end_decl lexbuf;
DTDFile file
}
| '['
{
ignore_spaces lexbuf;
let data = dtd_intern lexbuf in
dtd_end_decl lexbuf;
DTDData data
}
| _ | eof
{ dtd_error lexbuf EInvalidDTDDecl }

and dtd_file = parse
| '"'
{
Buffer.reset tmp;
let s = dq_string lexbuf in
ignore_spaces lexbuf;
s
}
| '\''
{
Buffer.reset tmp;
let s = q_string lexbuf in
ignore_spaces lexbuf;
s
}
| _ | eof
{ dtd_error lexbuf EInvalidDTDDecl }

and dtd_intern = parse
| ']'
{
ignore_spaces lexbuf;
[]
}
| ""
{
let l = dtd_item lexbuf in
l @ (dtd_intern lexbuf)
}

and dtd = parse
| eof
{ [] }
| newline
{
newline lexbuf;
dtd lexbuf
}
| (space | break)+
{ dtd lexbuf }
| ""
{
let l = dtd_item lexbuf in
l @ (dtd lexbuf)
}

and dtd_end_decl = parse
| '>'
{ ignore_spaces lexbuf }
| _ | eof
{ dtd_error lexbuf EInvalidDTDDecl }

and dtd_item = parse
| "<!--"
{
comment lexbuf;
[];
}
| "<!"
{
ignore_spaces lexbuf;
let t = dtd_item_type lexbuf in
let name = (try ident_name lexbuf with Error EIdentExpected -> raise (DTDError EInvalidDTDDecl)) in
ignore_spaces lexbuf;
match t with
| TElement -> [ DTDElement (name , (dtd_element_type lexbuf)) ]
| TAttribute -> List.map (fun (attrname,atype,adef) -> DTDAttribute (name, attrname, atype, adef)) (dtd_attributes lexbuf)
}
| _ | eof
{ dtd_error lexbuf EDTDItemExpected }

and dtd_attributes = parse
| '>'
{
ignore_spaces lexbuf;
[]
}
| ""
{
let attrname = (try ident_name lexbuf with Error EIdentExpected -> raise (DTDError EInvalidDTDAttribute)) in
ignore_spaces lexbuf;
let atype = dtd_attr_type lexbuf in
let adef = dtd_attr_default lexbuf in
let a = (attrname, atype, adef) in
a :: (dtd_attributes lexbuf)
}

and dtd_item_type = parse
| "ELEMENT"
{
ignore_spaces lexbuf;
TElement
}
| "ATTLIST"
{
ignore_spaces lexbuf;
TAttribute
}
| _ | eof
{ dtd_error lexbuf EInvalidDTDTag }

and dtd_element_type = parse
| "ANY"
{
ignore_spaces lexbuf;
dtd_end_element lexbuf;
DTDAny
}
| "EMPTY"
{
ignore_spaces lexbuf;
dtd_end_element lexbuf;
DTDEmpty
}
| '('
{
try
let item = Xml_parser.dtd_element dtd_element_token lexbuf in
ignore_spaces lexbuf;
DTDChild item
with
Parsing.Parse_error -> dtd_error lexbuf EInvalidDTDElement
}
| _ | eof
{ dtd_error lexbuf EInvalidDTDElement }

and dtd_end_element = parse
| '>'
{ ignore_spaces lexbuf }
| _ | eof
{ dtd_error lexbuf EInvalidDTDElement }

and dtd_end_attribute = parse
| '>'
{ ignore_spaces lexbuf }
| _ | eof
{ dtd_error lexbuf EInvalidDTDAttribute }

and dtd_element_token = parse
| newline
{
newline lexbuf;
dtd_element_token lexbuf
}
| (space | break) +
{ dtd_element_token lexbuf }
| '('
{ OPEN }
| ')'
{ CLOSE }
| ','
{ NEXT }
| '>'
{ END }
| '|'
{ OR }
| "#PCDATA"
{ PCDATA }
| '*'
{ STAR }
| '+'
{ PLUS }
| '?'
{ QUESTION }
| identchar+
{ IDENT (lexeme lexbuf) }
| _ | eof
{ dtd_error lexbuf EInvalidDTDElement }

and dtd_attr_type = parse
| "CDATA"
{
ignore_spaces lexbuf;
DTDCData
}
| "NMTOKEN"
{
ignore_spaces lexbuf;
DTDNMToken
}
| '('
{
ignore_spaces lexbuf;
DTDEnum (dtd_attr_enum lexbuf)
}
| _ | eof
{ dtd_error lexbuf EInvalidDTDAttribute }

and dtd_attr_enum = parse
| identchar+
{
let v = lexeme lexbuf in
ignore_spaces lexbuf;
v :: (dtd_attr_enum_next lexbuf)
}
| _ | eof
{ dtd_error lexbuf EInvalidDTDAttribute }

and dtd_attr_enum_next = parse
| ')'
{
ignore_spaces lexbuf;
[]
}
| '|'
{
ignore_spaces lexbuf;
dtd_attr_enum lexbuf
}
| _ | eof
{ dtd_error lexbuf EInvalidDTDAttribute }

and dtd_attr_default = parse
| '"'
{
Buffer.reset tmp;
let v = (try dq_string lexbuf with Error EUnterminatedString -> raise (DTDError EInvalidDTDAttribute)) in
ignore_spaces lexbuf;
DTDDefault v
}
| '\''
{
Buffer.reset tmp;
let v = (try q_string lexbuf with Error EUnterminatedString -> raise (DTDError EInvalidDTDAttribute)) in
ignore_spaces lexbuf;
DTDDefault v
}
| "#REQUIRED"
{
ignore_spaces lexbuf;
DTDRequired
}
| "#IMPLIED"
{
ignore_spaces lexbuf;
DTDImplied
}
| "#FIXED"
{
ignore_spaces lexbuf;
DTDFixed (dtd_attr_string lexbuf)
}
| "#DEFAULT"
{
ignore_spaces lexbuf;
DTDDefault (dtd_attr_string lexbuf)
}
| _ | eof
{ dtd_error lexbuf EInvalidDTDAttribute }

and dtd_attr_string = parse
| '"'
{
Buffer.reset tmp;
let v = (try dq_string lexbuf with Error EUnterminatedString -> raise (DTDError EInvalidDTDAttribute)) in
ignore_spaces lexbuf;
v
}
| '\''
{
Buffer.reset tmp;
let v = (try q_string lexbuf with Error EUnterminatedString -> raise (DTDError EInvalidDTDAttribute)) in
ignore_spaces lexbuf;
v
}
| _ | eof
{ dtd_error lexbuf EInvalidDTDAttribute }
ocaml-xml-light-2.2/xml_parser.mly000064400000000000000000000040011167637302700172570ustar00rootroot00000000000000%{(*
* Xml Light, an small Xml parser/printer with DTD support.
* Copyright (C) 2003 Nicolas Cannasse (ncannasse@motion-twin.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)
%}
%token NEXT OR
%token <string>IDENT
%token PCDATA
%token STAR QUESTION PLUS
%token OPEN CLOSE
%token END

%right STAR QUESTION PLUS

%start dtd_element
%type <Dtd.dtd_child> dtd_element
%%

dtd_element:
| dtd_full_seq END
{ $1 }
;
dtd_full_seq:
| dtd_seq CLOSE dtd_op
{ $3 $1 }
| dtd_seq CLOSE
{ $1 }
;
dtd_seq:
| dtd_item NEXT dtd_children
{ Dtd.DTDChildren ($1 :: $3) }
| dtd_item OR dtd_choice
{ Dtd.DTDChoice ($1 :: $3) }
| dtd_item
{ $1 }
;
dtd_children:
| dtd_item NEXT dtd_children
{ $1 :: $3 }
| dtd_item
{ [$1] }
;
dtd_choice:
| dtd_item OR dtd_choice
{ $1 :: $3 }
| dtd_item
{ [$1] }
;
dtd_item:
| OPEN dtd_full_seq
{ $2 }
| dtd_member
{ $1 }
;
dtd_member:
| IDENT dtd_op
{ $2 (Dtd.DTDTag $1) }
| PCDATA dtd_op
{ $2 Dtd.DTDPCData }
| IDENT
{ Dtd.DTDTag $1 }
| PCDATA
{ Dtd.DTDPCData }
;
dtd_op:
| dtd_op_item dtd_op
{ (fun x -> $2 ($1 x)) }
| dtd_op_item
{ $1 }
;
dtd_op_item:
| STAR
{ (fun x -> Dtd.DTDZeroOrMore x) }
| QUESTION
{ (fun x -> Dtd.DTDOptional x) }
| PLUS
{ (fun x -> Dtd.DTDOneOrMore x) }
;
 
projeto & código: Vladimir Lettiev aka crux © 2004-2005, Andrew Avramenko aka liks © 2007-2008
mantenedor atual: Michael Shigorin
mantenedor da tradução: Fernando Martini aka fmartini © 2009