HTML-WikiConverter-MediaWiki-0.59/000075500000000000000000000000001147254762400166575ustar00rootroot00000000000000HTML-WikiConverter-MediaWiki-0.59/Changes000064400000000000000000000030361147254762400201540ustar00rootroot00000000000000# Change log for HTML::WikiConverter::MediaWiki version: 0.59 date: 2009-05-29 changes: - (bug #46453) fix bug in which was triggered too often - update readme documentation version: 0.58 date: 2009-03-06 changes: - fix manifest version: 0.57 date: 2009-03-06 changes: - no longer pad section headings by default - add new span/font tests, marked as TODO - add perl license to Makefile.PL version: 0.56 date: 2008-11-11 changes: - (bug #28402) add tbody, thead, font to passthrough_naked_tags - preserve image width in [[Image:]] markup - update 'table w/ blocks' test now that H::WC 0.63 properly supports nested blocks - now requires H::WC 0.63 (for the above test) - add author/license to META.yml version: 0.55 date: Sun Sep 17 11:00:00 EST 2006 changes: - (bug #21531) Added 'preserve_nowiki' attribute - Added 'preserve_templates' attribute (for the TinyMCE folks; see http://meta.wikimedia.org/wiki/TinyMCE) version: 0.54 date: Thu Sep 1 00:00:00 EST 2006 changes: - Added alternate text for [[Image:]] markup version: 0.53 date: Thu Jul 20 19:00:00 EST 2006 changes: -
within is now on own line (reported on [[wp:User_talk:Diberri]]) - require H::WC 0.60 version: 0.52 date: Wed June 07 16:00:00 EST 2006 changes: - (bug #19046) allow lone '0' in text - add pad_headings attribute - update to require H::WC 0.54 version: 0.51 date: Fri Mar 03 2006 changes: - update to require H::WC 0.52 version: 0.50 date: Tue Jan 10 2006 changes: - branched from main HTML::WikiConverter codebase. HTML-WikiConverter-MediaWiki-0.59/MANIFEST000064400000000000000000000003351147254762400200110ustar00rootroot00000000000000Changes MANIFEST META.yml # Will be created by "make dist" Makefile.PL README lib/HTML/WikiConverter/MediaWiki.pm t/00-load.t t/01-mediawiki.t t/boilerplate.t t/mediawiki.preserve.t t/pod-coverage.t t/pod.t t/runtests.pl HTML-WikiConverter-MediaWiki-0.59/META.yml000064400000000000000000000011211147254762400201230ustar00rootroot00000000000000--- #YAML:1.0 name: HTML-WikiConverter-MediaWiki version: 0.59 abstract: Convert HTML to MediaWiki markup author: - David J. Iberri license: perl distribution_type: module configure_requires: ExtUtils::MakeMaker: 0 requires: HTML::WikiConverter: 0.63 Test::More: 0 URI: 1.35 no_index: directory: - t - inc generated_by: ExtUtils::MakeMaker version 6.48 meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 HTML-WikiConverter-MediaWiki-0.59/Makefile.PL000064400000000000000000000012601147254762400206300ustar00rootroot00000000000000use strict; use warnings; use ExtUtils::MakeMaker; WriteMakefile( NAME => 'HTML::WikiConverter::MediaWiki', AUTHOR => 'David J. Iberri ', VERSION_FROM => 'lib/HTML/WikiConverter/MediaWiki.pm', ABSTRACT_FROM => 'lib/HTML/WikiConverter/MediaWiki.pm', LICENSE => 'perl', PL_FILES => {}, PREREQ_PM => { 'Test::More' => 0, 'URI' => 1.35, 'HTML::WikiConverter' => 0.63, # for the 'table w/ blocks' test }, dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', }, clean => { FILES => 'HTML-WikiConverter-MediaWiki-*' }, ); HTML-WikiConverter-MediaWiki-0.59/README000064400000000000000000000026311147254762400175410ustar00rootroot00000000000000HTML::WikiConverter::MediaWiki ============================== HTML::WikiConverter::MediaWiki adds the MediaWiki dialect to HTML::WikiConverter allowing conversion from HTML to MediaWiki markup. SYNOPSIS Converting HTML to wiki markup is easy: use HTML::WikiConverter; my $wc = new HTML::WikiConverter( dialect => 'MediaWiki' ); print $wc->html2wiki( $html ); Or from the command line: % html2wiki --dialect MediaWiki input.html > output.wiki There's also a web interface if you're so inclined: http://toolserver.org/~diberri/cgi-bin/html2wiki/ INSTALLATION To install this module, run the following commands: perl Makefile.PL make make test make install SUPPORT AND DOCUMENTATION After installing, you can find documentation for this module with the perldoc command. perldoc HTML::WikiConverter::MediaWiki You can also look for information at: Search CPAN http://search.cpan.org/dist/HTML-WikiConverter-MediaWiki CPAN Request Tracker: http://rt.cpan.org/NoAuth/Bugs.html?Dist=HTML-WikiConverter-MediaWiki AnnoCPAN, annotated CPAN documentation: http://annocpan.org/dist/HTML-WikiConverter-MediaWiki CPAN Ratings: http://cpanratings.perl.org/d/HTML-WikiConverter-MediaWiki COPYRIGHT AND LICENCE Copyright (c) David J. Iberri This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. HTML-WikiConverter-MediaWiki-0.59/lib/000075500000000000000000000000001147254762400174255ustar00rootroot00000000000000HTML-WikiConverter-MediaWiki-0.59/lib/HTML/000075500000000000000000000000001147254762400201715ustar00rootroot00000000000000HTML-WikiConverter-MediaWiki-0.59/lib/HTML/WikiConverter/000075500000000000000000000000001147254762400227645ustar00rootroot00000000000000HTML-WikiConverter-MediaWiki-0.59/lib/HTML/WikiConverter/MediaWiki.pm000064400000000000000000000342321147254762400251710ustar00rootroot00000000000000package HTML::WikiConverter::MediaWiki; use base 'HTML::WikiConverter'; use warnings; use strict; use URI; use File::Basename; use HTML::Tagset; our $VERSION = '0.59'; =head1 NAME HTML::WikiConverter::MediaWiki - Convert HTML to MediaWiki markup =head1 SYNOPSIS use HTML::WikiConverter; my $wc = new HTML::WikiConverter( dialect => 'MediaWiki' ); print $wc->html2wiki( $html ); =head1 DESCRIPTION This module contains rules for converting HTML into MediaWiki markup. See L for additional usage details. =head1 ATTRIBUTES In addition to the regular set of attributes recognized by the L constructor, this dialect also accepts the following attributes: =head2 preserve_bold Boolean indicating whether bold HTML elements should be preserved as HTML in the wiki output rather than being converted into MediaWiki markup. By default, EbE and EstrongE elements are converted to wiki markup identically. But sometimes you may wish EbE tags in the HTML to be preserved in the resulting MediaWiki markup. This attribute allows this. For example, if C is enabled, HTML like
  • Bold
  • Strong
will be converted to * Bold * '''Strong''' When disabled (the default), the preceding HTML markup would be converted into * '''Bold''' * '''Strong''' =head2 preserve_italic Boolean indicating whether italic HTML elements should be preserved as HTML in the wiki output rather than being converted into MediaWiki markup. For example, if C is enabled, HTML like
  • Italic
  • Emphasized
will be converted to * Italic * ''Emphasized'' When disabled (the default), the preceding HTML markup would be converted into * ''Italic'' * ''Emphasized'' =head2 preserve_templates Boolean indicating whether C<{{template}}> calls found in HTML should be preserved in the wiki markup. If disabled (the default), templates calls will be wrapped in CnowikiE> tags. =head2 preserve_nowiki Boolean indicating whether CnowikiE> tags found in HTML should be preserved in the wiki markup. If disabled (the default), nowiki tags will be replaced with their content. =head2 pad_headings Boolean indicating whether section headings should be padded with spaces (eg, "== Section ==" instead of "==Section=="). Default is false (ie, not to pad). =cut my @common_attrs = qw/ id class lang dir title style /; my @block_attrs = ( @common_attrs, 'align' ); my @tablealign_attrs = qw/ align char charoff valign /; my @tablecell_attrs = qw( abbr axis headers scope rowspan colspan nowrap width height bgcolor ); # Fix for bug 14527 my $pre_prefix = '[jsmckaoqkjgbhazkfpwijhkixh]'; sub rules { my $self = shift; my %rules = ( hr => { replace => "\n----\n" }, br => { preserve => 1, empty => 1, attributes => [ qw/id class title style clear/ ] }, p => { block => 1, trim => 'both', line_format => 'single' }, em => { start => "''", end => "''", line_format => 'single' }, strong => { start => "'''", end => "'''", line_format => 'single' }, i => { alias => 'em' }, b => { alias => 'strong' }, pre => { line_prefix => $pre_prefix, block => 1 }, table => { start => \&_table_start, end => "|}", block => 1, line_format => 'blocks' }, tr => { start => \&_tr_start }, td => { start => \&_td_start, end => "\n", trim => 'both', line_format => 'blocks' }, th => { start => \&_td_start, end => "\n", trim => 'both', line_format => 'single' }, caption => { start => \&_caption_start, end => "\n", line_format => 'single' }, img => { replace => \&_image }, a => { replace => \&_link }, ul => { line_format => 'multi', block => 1 }, ol => { alias => 'ul' }, dl => { alias => 'ul' }, li => { start => \&_li_start, trim => 'leading' }, dt => { alias => 'li' }, dd => { alias => 'li' }, # Preserved elements, from MediaWiki's Sanitizer.php (http://tinyurl.com/dzj6o) div => { preserve => 1, attributes => \@block_attrs }, span => { preserve => 1, attributes => \@block_attrs }, blockquote => { preserve => 1, attributes => [ @common_attrs, qw/ cite / ] }, del => { preserve => 1, attributes => [ @common_attrs, qw/ cite datetime / ] }, ins => { preserve => 1, attributes => [ @common_attrs, qw/ cite datetime / ] }, font => { preserve => 1, attributes => [ @common_attrs, qw/ size color face / ] }, # Headings (h1-h6) h1 => { start => \&_hr_start, end => \&_hr_end, block => 1, trim => 'both', line_format => 'single' }, h2 => { start => \&_hr_start, end => \&_hr_end, block => 1, trim => 'both', line_format => 'single' }, h3 => { start => \&_hr_start, end => \&_hr_end, block => 1, trim => 'both', line_format => 'single' }, h4 => { start => \&_hr_start, end => \&_hr_end, block => 1, trim => 'both', line_format => 'single' }, h5 => { start => \&_hr_start, end => \&_hr_end, block => 1, trim => 'both', line_format => 'single' }, h6 => { start => \&_hr_start, end => \&_hr_end, block => 1, trim => 'both', line_format => 'single' }, ); my @preserved = qw/ center cite code var sup sub tt big small strike s u ruby rb rt rp /; push @preserved, 'i' if $self->preserve_italic; push @preserved, 'b' if $self->preserve_bold; push @preserved, 'nowiki' if $self->preserve_nowiki; $rules{$_} = { preserve => 1, attributes => \@common_attrs } foreach @preserved; return \%rules; } sub attributes { { preserve_italic => { default => 0 }, preserve_bold => { default => 0 }, strip_tags => { default => [ qw/ head style script ~comment title meta link object / ] }, pad_headings => { default => 0 }, preserve_templates => { default => 0 }, preserve_nowiki => { default => 0 }, # see bug #28402 # xxx passthrough_naked_tags => { default => [ qw/ tbody thead font / ] }, passthrough_naked_tags => { default => [ qw/ tbody thead font span / ] }, } } sub _hr_start { my( $wc, $node, $subrules ) = @_; ( my $level = $node->tag ) =~ s/\D//g; my $affix = ('=') x $level; return $wc->pad_headings ? "$affix " : $affix; } sub _hr_end { my( $wc, $node, $subrules ) = @_; ( my $level = $node->tag ) =~ s/\D//g; my $affix = ('=') x $level; return $wc->pad_headings ? " $affix" : $affix; } sub postprocess_output { my( $self, $outref ) = @_; $$outref =~ s/\Q$pre_prefix\E/ /g; } # Calculates the prefix that will be placed before each list item. # Handles ordered, unordered, and definition list items. sub _li_start { my( $self, $node, $rules ) = @_; my @parent_lists = $node->look_up( _tag => qr/ul|ol|dl/ ); my $prefix = ''; foreach my $parent ( @parent_lists ) { my $bullet = ''; $bullet = '*' if $parent->tag eq 'ul'; $bullet = '#' if $parent->tag eq 'ol'; $bullet = ':' if $parent->tag eq 'dl'; $bullet = ';' if $parent->tag eq 'dl' and $node->tag eq 'dt'; $prefix = $bullet.$prefix; } return "\n$prefix "; } sub _link { my( $self, $node, $rules ) = @_; my $url = defined $node->attr('href') ? $node->attr('href') : ''; my $text = $self->get_elem_contents($node); # Handle internal links if( my $title = $self->get_wiki_page( $url ) ) { $title =~ s/_/ /g; return "[[$title]]" if $text eq $title; # no difference between link text and page title return "[[$text]]" if $text eq lcfirst $title; # differ by 1st char. capitalization return "[[$title|$text]]"; # completely different } # Treat them as external links return $url if $url eq $text; return "[$url $text]"; } sub _image { my( $self, $node, $rules ) = @_; return '' unless $node->attr('src'); my $alt = $node->attr('alt') || ''; my $img = basename( URI->new($node->attr('src'))->path ); my $width = $node->attr('width') || ''; return sprintf '[[Image:%s|%spx|%s]]', $img, $width, $alt if $alt and $width; return sprintf '[[Image:%s|%s]]', $img, $alt if $alt; return sprintf '[[Image:%s]]', $img; } sub _table_start { my( $self, $node, $rules ) = @_; my $prefix = '{|'; my @table_attrs = ( @common_attrs, qw/ summary width border frame rules cellspacing cellpadding align bgcolor frame rules / ); my $attrs = $self->get_attr_str( $node, @table_attrs ); $prefix .= ' '.$attrs if $attrs; return $prefix."\n"; } sub _tr_start { my( $self, $node, $rules ) = @_; my $prefix = '|-'; my @tr_attrs = ( @common_attrs, 'bgcolor', @tablealign_attrs ); my $attrs = $self->get_attr_str( $node, @tr_attrs ); $prefix .= ' '.$attrs if $attrs; return '' unless $node->left or $attrs; return $prefix."\n"; } # List of tags (and pseudo-tags, in the case of '~text') that are # considered phrasal elements. Any table cells that contain only these # elements will be placed on a single line. my @td_phrasals = qw/ i em b strong u tt code span font sup sub br ~text s strike del ins /; my %td_phrasals = map { $_ => 1 } @td_phrasals; sub _td_start { my( $self, $node, $rules ) = @_; my $prefix = $node->tag eq 'th' ? '!' : '|'; my @td_attrs = ( @common_attrs, @tablecell_attrs, @tablealign_attrs ); my $attrs = $self->get_attr_str( $node, @td_attrs ); $prefix .= ' '.$attrs.' |' if $attrs; # If there are any non-text elements inside the cell, then the # cell's content should start on its own line my @non_text = grep !$td_phrasals{$_->tag}, $node->content_list; my $space = @non_text ? "\n" : ' '; return $prefix.$space; } sub _caption_start { my( $self, $node, $rules ) = @_; my $prefix = '|+ '; my @caption_attrs = ( @common_attrs, 'align' ); my $attrs = $self->get_attr_str( $node, @caption_attrs ); $prefix .= $attrs.' |' if $attrs; return $prefix; } sub preprocess_node { my( $self, $node ) = @_; my $tag = defined $node->tag ? $node->tag : ''; $self->strip_aname($node) if $tag eq 'a'; $self->_strip_extra($node); $self->_nowiki_text($node) if $tag eq '~text'; # # XXX font-to-span convers # $node->tag('span') if $tag eq 'font'; } my $URL_PROTOCOLS = 'http|https|ftp|irc|gopher|news|mailto'; my $EXT_LINK_URL_CLASS = '[^]<>"\\x00-\\x20\\x7F]'; my $EXT_LINK_TEXT_CLASS = '[^\]\\x00-\\x1F\\x7F]'; # Text nodes matching one or more of these patterns will be enveloped # in and sub _wikitext_patterns { my $self = shift; # the caret in "qr/^/" seems redundant with "start_of_line" but both # are necessary my %wikitext_patterns = ( misc => { pattern => qr/^(?:\*|\#|\;|\:|\=|\!|\|)/m, location => 'start_of_line' }, italic => { pattern => qr/''/, location => 'anywhere' }, rule => { pattern => qr/^----/m, location => 'start_of_line' }, table => { pattern => qr/^\{\|/m, location => 'start_of_line' }, link => { pattern => qr/\[\[/m, location => 'anywhere' }, template => { pattern => qr/{{/m, location => 'anywhere' }, ); delete $wikitext_patterns{template} if $self->preserve_templates; return \%wikitext_patterns; } sub _nowiki_text { my( $self, $node ) = @_; my $text = defined $node->attr('text') ? $node->attr('text') : ''; return unless $text; my $wikitext_patterns = $self->_wikitext_patterns; my $found_nowiki_text = 0; ANYWHERE: { my @anywhere_patterns = map { $_->{pattern} } grep { $_->{location} eq 'anywhere' } values %$wikitext_patterns; $found_nowiki_text++ if $self->_match( $text, \@anywhere_patterns ); }; START_OF_LINE: { last if $found_nowiki_text; my @sol_patterns = map { $_->{pattern} } grep { $_->{location} eq 'start_of_line' } values %$wikitext_patterns; # find closest parent that is a block-level node my $nearest_parent_block = $self->elem_search_lineage( $node, { block => 1 } ); if( $nearest_parent_block ) { my $leftmostish_text_node = $self->_get_leftmostish_text_node( $nearest_parent_block ); if( $leftmostish_text_node and $node == $leftmostish_text_node ) { # I'm the first child in this block element, so let's apply start_of_line nowiki fixes $found_nowiki_text++ if $self->_match( $text, \@sol_patterns ); } } }; if( $found_nowiki_text ) { $text = "$text"; } else { $text =~ s~(\[\b(?:$URL_PROTOCOLS):$EXT_LINK_URL_CLASS+ *$EXT_LINK_TEXT_CLASS*?\])~$1~go; } $node->attr( text => $text ); } sub _get_leftmostish_text_node { my( $self, $node ) = @_; return unless $node; return $node if $node->tag eq '~text'; return $self->_get_leftmostish_text_node( ($node->content_list)[0] ) } sub _match { my( $self, $text, $patterns ) = @_; $text =~ $_ && return 1 for @$patterns; return 0; } my %extra = ( id => qr/catlinks/, class => qr/urlexpansion|printfooter|editsection/ ); # Delete ... et al sub _strip_extra { my( $self, $node ) = @_; my $tag = defined $node->tag ? $node->tag : ''; foreach my $att_name ( keys %extra ) { my $att_value = defined $node->attr($att_name) ? $node->attr($att_name) : ''; if( $att_value =~ $extra{$att_name} ) { $node->detach(); $node->delete(); return; } } } =head1 AUTHOR David J. Iberri, C<< >> =head1 BUGS Please report any bugs or feature requests to C, or through the web interface at L. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. =head1 SUPPORT You can find documentation for this module with the perldoc command. perldoc HTML::WikiConverter::MediaWiki You can also look for information at: =over 4 =item * AnnoCPAN: Annotated CPAN documentation L =item * CPAN Ratings L =item * RT: CPAN's request tracker L =item * Search CPAN L =back =head1 COPYRIGHT & LICENSE Copyright 2006 David J. Iberri, all rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; HTML-WikiConverter-MediaWiki-0.59/t/000075500000000000000000000000001147254762400171225ustar00rootroot00000000000000HTML-WikiConverter-MediaWiki-0.59/t/00-load.t000064400000000000000000000003101147254762400204350ustar00rootroot00000000000000#!perl -T use Test::More tests => 1; BEGIN { use_ok( 'HTML::WikiConverter::MediaWiki' ); } diag( "Testing HTML::WikiConverter::MediaWiki $HTML::WikiConverter::MediaWiki::VERSION, Perl $], $^X" ); HTML-WikiConverter-MediaWiki-0.59/t/01-mediawiki.t000064400000000000000000000327441147254762400215020ustar00rootroot00000000000000local $/; require 't/runtests.pl'; runtests( data => , dialect => 'MediaWiki', wiki_uri => 'http://www.test.com/wiki/' ); close DATA; __DATA__ external link __H__

[http://example.com]

__W__ [http://example.com [http://example.com]] __NEXT__ nowiki template __H__

mark stubs with {{stub}}

__W__ mark stubs with {{stub}} __NEXT__ nowiki quoted __H__

what happens to 'quoted text'?

__W__ what happens to 'quoted text'? __NEXT__ nowiki doubly quoted __H__

how about ''doubly quoted''?

__W__ how about ''doubly quoted''? __NEXT__ nowiki triply quoted __H__

and '''triply quoted'''?

__W__ and '''triply quoted'''? __NEXT__ nowiki hr __H__

----

__W__ ---- __NEXT__ nowiki ul __H__

* ul

__W__ * ul __NEXT__ nowiki ol __H__

# ol

__W__ # ol __NEXT__ nowiki def __H__

; def

__W__ ; def __NEXT__ nowiki indent __H__

: indent

__W__ : indent __NEXT__ nowiki internal links __H__

an [[internal]] link

__W__ an [[internal]] link __NEXT__ nowiki table markup __H__

{|
| table
|}

__W__ {|
| table
|} __NEXT__ nowiki ext link __H__

[http://example.com]

__W__ [http://example.com] __NEXT__ (bug #46453) triggering too often __H__ x:bla __W__ ''x'':bla __NEXT__ do not add a tag only if offending character(s) occur at the beginning of text node __H__

text *

text #

text ;

text :

text =

text !

text |

text ----

text {| __W__ text '''*''' text '''#''' text ''';''' text ''':''' text '''=''' text '''!''' text '''|''' text '''----''' text '''{|''' __NEXT__ tr attributes __H__
ok
__W__ {| |- align="left" valign="top" | ok |} __NEXT__ preserve cite __H__ text __W__ text __NEXT__ preserve var __H__ text __W__ text __NEXT__ preserve blockquote __H__

text
__W__
text
__NEXT__ preserve ruby __H__ text __W__ text __NEXT__ preserve rb __H__ text __W__ text __NEXT__ preserve rt __H__ text __W__ text __NEXT__ preserve rp __H__ text __W__ text __NEXT__ preserve div __H__
ok
__W__
ok
__NEXT__ empty line break __H__

__W__
__NEXT__ br attribs __H__ ok
__W__ ok
__NEXT__ wrap in html __H__ GOOGLE
NewLine __W__ [http://google.com GOOGLE]
NewLine __NEXT__ bold __H__ bold __W__ '''bold''' __NEXT__ italics __H__ italics __W__ ''italics'' __NEXT__ bold and italics __H__ bold and italics __W__ '''bold''' and ''italics'' __NEXT__ bold-italics nested __H__ bold-italics nested __W__ '''''bold-italics'' nested''' __NEXT__ strong __H__ strong __W__ '''strong''' __NEXT__ emphasized __H__ emphasized __W__ ''emphasized'' __NEXT__ underlined __H__ underlined __W__ underlined __NEXT__ strikethrough __H__ strike __W__ strike __NEXT__ deleted __H__ deleted text __W__ deleted text __NEXT__ inserted __H__ inserted __W__ inserted __NEXT__ span tags removed if naked (ie, have no attribs) __H__ text here __W__ text here __NEXT__ strip aname __H__ __W__ __NEXT__ one-line phrasals __H__ phrasals in one line __W__ ''phrasals in one line'' __NEXT__ paragraph blocking __H__

p1

p2

__W__ p1 p2 __NEXT__ lists __H__
  • 1
  • 2
__W__ * 1 * 2 __NEXT__ nested lists __H__
  • 1
    • 1a
    • 1b
  • 2
__W__ * 1 ** 1a ** 1b * 2 __NEXT__ nested lists (different types) __H__
  • 1
    • a
      1. i
    • b
  • 2
    indented
__W__ * 1 ** a **# i ** b * 2 *: indented __NEXT__ hr __H__
__W__ ---- __NEXT__ br __H__

stuff
stuff two

__W__ stuff
stuff two __NEXT__ div __H__
thing
__W__
thing
__NEXT__ div w/ attrs __H__
thing
__W__
thing
__NEXT__ sub __H__

H2O

__W__ H2O __NEXT__ sup __H__

x2

__W__ x2 __NEXT__ center __H__
centered text
__W__
centered text
__NEXT__ small __H__ small text __W__ small text __NEXT__ code __H__ $name = 'stan'; __W__ $name = 'stan'; __NEXT__ tt __H__ tt text __W__ tt text __NEXT__ font-to-span conversion ::TODO("HTML::WikiConverter::Normalizer not doing font-to-span conversion yet") __H__ font __W__ font __NEXT__ font __H__ font __W__ font __NEXT__ pre __H__
this
  is
    preformatted
      text
__W__ this is preformatted text __NEXT__ indent __H__
indented text
__W__ : indented text __NEXT__ nested indent __H__
stuff
double-indented
__W__ : stuff :: double-indented __NEXT__ h1 __H__

h1

__W__ =h1= __NEXT__ h2 __H__

h2

__W__ ==h2== __NEXT__ h3 __H__

h3

__W__ ===h3=== __NEXT__ h4 __H__

h4

__W__ ====h4==== __NEXT__ h5 __H__
h5
__W__ =====h5===== __NEXT__ h6 __H__
h6
__W__ ======h6====== __NEXT__ img __H__ __W__ [[Image:thing.gif]] __NEXT__ table __H__
Stuff
Name David
Age 24
Height 6'
Nested tables
are fun
__W__ {| |+ Stuff |- ! Name | David |- ! Age | 24 |- ! Height | 6' |- | {| | Nested | tables |- | are | fun |} |} __NEXT__ table w/ attrs __H__
Stuff
Name Foo
Age 24
Height 6'
__W__ {| border="1" cellpadding="3" bgcolor="#ffffff" |+ Stuff |- id="first" class="unselected" ! id="thing" bgcolor="black" | Name | Foo |- class="selected" ! Age | 24 |- class="unselected" ! Height | 6' |} __NEXT__ table w/ blocks __H__

Paragraph 1

Paragraph 2

__W__ {| | align="center" | Paragraph 1 Paragraph 2 |} __NEXT__ strip empty aname __H__ some text __W__ some text __NEXT__ wiki link (text == title) __H__ Some wiki page __W__ [[Some wiki page]] __NEXT__ wiki link (text case != title case) __H__ another page __W__ [[another page]] __NEXT__ wiki link (text != title) __H__ some text __W__ [[Another page|some text]] __NEXT__ external links __H__ thing __W__ [http://www.test.com thing] __NEXT__ external links (rel2abs) __H__ thing __W__ [http://www.test.com/thing.html thing] __NEXT__ strip urlexpansion __H__ Google (http://www.google.com) __W__ [http://www.google.com Google] __NEXT__ strip printfooter __H__
Retrieved from blah blah
__W__ __NEXT__ strip catlinks __H__ __W__ __NEXT__ strip editsection __H__ This is great __W__ This is great __NEXT__ escape bracketed urls __H__

This is a text node with what looks like an ext. link [http://example.org].

__W__ This is a text node with what looks like an ext. link [http://example.org]. __NEXT__ line with vertical bar __H__

| a line with a vertical bar

__W__ | a line with a vertical bar __NEXT__ line that starts with a bang __H__

! a line that starts with a bang

__W__ ! a line that starts with a bang __NEXT__ line that looks like a section __H__

= a line that looks like a section

__W__ = a line that looks like a section __NEXT__ pre-many (bug #14527) __H__
preformatted text

with spaces

should produce only one

pre-block
__W__ preformatted text with spaces should produce only one pre-block __NEXT__ pre following pre __H__
preformatted text
more preformatted text
once again
__W__ preformatted text more preformatted text once again __NEXT__ preserve ::preserve_bold(1) __H__ hello __W__ hello __NEXT__ hr under td __H__

__W__ {| | ---- |} __NEXT__ img alt __H__ Just a test __W__ [[Image:thing.gif|Just a test]] __NEXT__ no preserve templates ::preserve_templates(0) __H__ {{template}} __W__ {{template}} __NEXT__ preserve templates ::preserve_templates(1) {{template}} __W__ {{template}} __NEXT__ no preserve nowiki ::preserve_nowiki(0) __H__ hey __W__ hey __NEXT__ preserve nowiki ::preserve_nowiki(1) __H__ hey __W__ hey __NEXT__ preserve image width __H__ The Thing __W__ [[Image:thing.jpg|200px|The Thing]] __NEXT__ tbody and thead fixes (bug #28402) __H__
heading col 1 heading col 2 heading last col
data first col first row data c2 r1 data c3 r1
data c1 r2 data c2 r2 data c3 r2
data c1 r3 data c2 r3 data c3 r3
__W__ {| border="1" |- ! heading col 1 ! heading col 2 ! heading last col |- | data first col first row | data c2 r1 | data c3 r1 |- | data c1 r2 | data c2 r2 | data c3 r2 |- | data c1 r3 | data c2 r3 | data c3 r3 |} __NEXT__ don't pad headings ::pad_headings(0) __H__

Heading

__W__ ==Heading== __NEXT__ table with zeros __H__
0
1
0
1
__W__ {| | 0 |- | 1 |- | 0 |- | 1 |} __NEXT__ (bug #40845) internal links, without wiki_uri __H__ Class Browser __W__ [http://www.test.com/class_browser.html Class Browser] __NEXT__ (bug #40845) internal links, with wiki_uri=base_uri ::wiki_uri('http://www.test.com/') __H__ Class Browser __W__ [[class browser.html|Class Browser]] __NEXT__ (bug #40845) broken links with anchors, without wiki_uri __H__ adding __W__ [http://www.test.com#Adding adding] __NEXT__ (bug #40845) links with anchors, with wiki_uri ::wiki_uri('http://www.test.com/') ::TODO('wiki_uri not working with an ending slash') __H__ adding __W__ [[#Adding|adding]] __NEXT__ (bug #24745) font/span weirdness ::TODO("HTML::WikiConverter::Normalizer doesn't handle this yet"); __H__

The Test Header

__W__ The Test Header __NEXT__ (bug #29342) Tag attributes with 0 ::TODO("this is actually an H::WC-specific bug") __H__
HelloWorld
__W__ {| border="1" cellpadding="3" cellspacing="0" | Hello | World |} HTML-WikiConverter-MediaWiki-0.59/t/boilerplate.t000064400000000000000000000023431147254762400216130ustar00rootroot00000000000000#!perl -T use strict; use warnings; use Test::More tests => 3; sub not_in_file_ok { my ($filename, %regex) = @_; open my $fh, "<", $filename or die "couldn't open $filename for reading: $!"; my %violated; while (my $line = <$fh>) { while (my ($desc, $regex) = each %regex) { if ($line =~ $regex) { push @{$violated{$desc}||=[]}, $.; } } } if (%violated) { fail("$filename contains boilerplate text"); diag "$_ appears on lines @{$violated{$_}}" for keys %violated; } else { pass("$filename contains no boilerplate text"); } } not_in_file_ok(README => "The README is used..." => qr/The README is used/, "'version information here'" => qr/to provide version information/, ); not_in_file_ok(Changes => "placeholder date/time" => qr(Date/time) ); sub module_boilerplate_ok { my ($module) = @_; not_in_file_ok($module => 'the great new $MODULENAME' => qr/ - The great new /, 'boilerplate description' => qr/Quick summary of what the module/, 'stub function definition' => qr/function[12]/, ); } module_boilerplate_ok('lib/HTML/WikiConverter/MediaWiki.pm'); HTML-WikiConverter-MediaWiki-0.59/t/mediawiki.preserve.t000064400000000000000000000020001147254762400230740ustar00rootroot00000000000000local $/; require 't/runtests.pl'; runtests( data => , dialect => 'MediaWiki', minimal => 1, preserve_italic => 1, preserve_bold => 1 ); close DATA; __DATA__ preserve bold __H__ bold __W__ bold __NEXT__ preserve bold w/ attrs __H__ this __W__ this __NEXT__ preserve bold w/ bad attrs __H__ clickme __W__ clickme __NEXT__ convert strong __H__ strong __W__ '''strong''' __NEXT__ both strong/b __H__
  • bold
  • strong
__W__ * bold * '''strong''' __NEXT__ preserve italic __H__ italic __W__ italic __NEXT__ preserve italic w/ attrs __H__ italic __W__ italic __NEXT__ preserve italic w/ bad attrs __H__ clickme __W__ clickme __NEXT__ convert em __H__ em __W__ ''em'' __NEXT__ both em/i __H__
  • italic
  • em
__W__ * italic * ''em'' HTML-WikiConverter-MediaWiki-0.59/t/pod-coverage.t000064400000000000000000000010551147254762400216630ustar00rootroot00000000000000#!perl -T use Test::More; eval "use Test::Pod::Coverage 1.04"; plan skip_all => "Test::Pod::Coverage 1.04 required for testing POD coverage" if $@; all_pod_coverage_ok( { also_private => [ # These methods are documented in HTML::WikiConverter::Dialects qr/ get_elem_contents |get_wiki_page |get_attr_str |elem_within_block |is_camel_case |rule |rules |attribute |attributes |preprocess_tree |preprocess_node |postprocess_output |caption2para |strip_aname |base_url |wiki_url /x ] } ); HTML-WikiConverter-MediaWiki-0.59/t/pod.t000064400000000000000000000002141147254762400200660ustar00rootroot00000000000000#!perl -T use Test::More; eval "use Test::Pod 1.14"; plan skip_all => "Test::Pod 1.14 required for testing POD" if $@; all_pod_files_ok(); HTML-WikiConverter-MediaWiki-0.59/t/runtests.pl000064400000000000000000000066251147254762400213570ustar00rootroot00000000000000#!/usr/bin/perl use warnings; use strict; use Test::More; use File::Spec; use HTML::Entities; use HTML::WikiConverter; *e = \&encode_entities; my $more_tests = < comment __W__ A comment __NEXT__ strip head __H__ fun stuff

Crazy stuff here

__W__ Crazy stuff here __NEXT__ strip scripts __H__

benevolent text

__W__ benevolent text END_TESTS sub runtests { my %arg = @_; $arg{wrap_in_html} = 1; $arg{base_uri} ||= 'http://www.test.com'; my $minimal = $arg{minimal} || 0; my $data = $arg{data} || ''; $data .= entity_tests() . $more_tests unless $minimal; my @tests = split /__NEXT__\n/, $data; my $numtests = @tests; #$numtests += 1 unless $minimal; # file test plan tests => $numtests; # Delete unrecognized HTML::WikiConverter options delete $arg{$_} for qw/ data minimal /; my $wc = new HTML::WikiConverter(%arg); foreach my $test ( @tests ) { $test =~ s/^(.*?)\n//; my $name = $1; my( $html, $wiki ) = split /__W__\n/, $test; $html =~ s/__H__\n//; # $name =~ s{\s*\:\:(\w+\([^\)]*?\))}{ # my $method_call = $1; # eval "\$wc->$method_call;"; # die "Failed test call ($name): $@" if $@; # ''; # }ge; my( $todo, $todo_reason ); $name =~ s{\s*\:\:(\w+\([^\)]*?\))}{ my $keyword = $1; if( $keyword =~ /TODO\((\"|\')(.*?)\1/ ) { $todo = 1; $todo_reason = $2; } else { my $method_call = $keyword; eval "\$wc->$method_call;"; die "Failed test call ($name): $@" if $@; } ''; }ge; for( $html, $wiki ) { s/^\n+//; s/\n+$// } if( $todo ) { TODO: { local $TODO = $todo_reason; is( $wc->html2wiki($html), $wiki, $name ); } } else { is( $wc->html2wiki($html), $wiki, $name ); } } #file_test($wc) unless $minimal; } sub entity_tests { my $tmpl = "__NEXT__\n%s\n__H__\n%s\n__W__\n%s\n"; # test-name, html-input, expected-wiki-output my $data = ''; my @chars = ( '<', '>', '&' ); foreach my $char ( @chars ) { ( my $charname = e($char) ) =~ s/[&;]//g; $data .= sprintf $tmpl, "literal ($charname)", $char, e($char) . sprintf $tmpl, "encode ($charname)", e($char), e($char) . sprintf $tmpl, "meta ($charname)", e(e($char)), e(e($char)); } return $data; } sub _slurp { my $path = shift; open H, $path or die "couldn't open $path: $!"; local $/; my $c = ; close H; return $c; } sub file_test { my $wc = shift; my $lc_dialect = lc $wc->dialect; my $infile = File::Spec->catfile( 't', 'complete.html' ); my $outfile = File::Spec->catfile( 't', "complete.$lc_dialect" ); SKIP: { skip "Couldn't find $infile (ignore this)", 1 unless -e $infile; skip "Couldn't find $outfile (ignore this)", 1 unless -e $outfile; my( $got, $expect ) = ( $wc->html2wiki( file => $infile, slurp => 1 ), _slurp($outfile) ); for( $got, $expect ) { s/^\n+//; s/\n+$// } is( $got, $expect, 'read from file' ); }; } 1;