was triggered too often
- update readme documentation
version: 0.58
date: 2009-03-06
changes:
- fix manifest
version: 0.57
date: 2009-03-06
changes:
- no longer pad section headings by default
- add new span/font tests, marked as TODO
- add perl license to Makefile.PL
version: 0.56
date: 2008-11-11
changes:
- (bug #28402) add tbody, thead, font to passthrough_naked_tags
- preserve image width in [[Image:]] markup
- update 'table w/ blocks' test now that H::WC 0.63 properly supports nested blocks
- now requires H::WC 0.63 (for the above test)
- add author/license to META.yml
version: 0.55
date: Sun Sep 17 11:00:00 EST 2006
changes:
- (bug #21531) Added 'preserve_nowiki' attribute
- Added 'preserve_templates' attribute (for the TinyMCE folks; see http://meta.wikimedia.org/wiki/TinyMCE)
version: 0.54
date: Thu Sep 1 00:00:00 EST 2006
changes:
- Added alternate text for [[Image:]] markup
version: 0.53
date: Thu Jul 20 19:00:00 EST 2006
changes:
-
within is now on own line (reported on [[wp:User_talk:Diberri]])
- require H::WC 0.60
version: 0.52
date: Wed June 07 16:00:00 EST 2006
changes:
- (bug #19046) allow lone '0' in text
- add pad_headings attribute
- update to require H::WC 0.54
version: 0.51
date: Fri Mar 03 2006
changes:
- update to require H::WC 0.52
version: 0.50
date: Tue Jan 10 2006
changes:
- branched from main HTML::WikiConverter codebase.
HTML-WikiConverter-MediaWiki-0.59/MANIFEST 0000644 0000000 0000000 00000000335 11472547624 0020011 0 ustar 00root root 0000000 0000000 Changes
MANIFEST
META.yml # Will be created by "make dist"
Makefile.PL
README
lib/HTML/WikiConverter/MediaWiki.pm
t/00-load.t
t/01-mediawiki.t
t/boilerplate.t
t/mediawiki.preserve.t
t/pod-coverage.t
t/pod.t
t/runtests.pl
HTML-WikiConverter-MediaWiki-0.59/META.yml 0000644 0000000 0000000 00000001121 11472547624 0020123 0 ustar 00root root 0000000 0000000 --- #YAML:1.0
name: HTML-WikiConverter-MediaWiki
version: 0.59
abstract: Convert HTML to MediaWiki markup
author:
- David J. Iberri
license: perl
distribution_type: module
configure_requires:
ExtUtils::MakeMaker: 0
requires:
HTML::WikiConverter: 0.63
Test::More: 0
URI: 1.35
no_index:
directory:
- t
- inc
generated_by: ExtUtils::MakeMaker version 6.48
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
version: 1.4
HTML-WikiConverter-MediaWiki-0.59/Makefile.PL 0000644 0000000 0000000 00000001260 11472547624 0020630 0 ustar 00root root 0000000 0000000 use strict;
use warnings;
use ExtUtils::MakeMaker;
WriteMakefile(
NAME => 'HTML::WikiConverter::MediaWiki',
AUTHOR => 'David J. Iberri ',
VERSION_FROM => 'lib/HTML/WikiConverter/MediaWiki.pm',
ABSTRACT_FROM => 'lib/HTML/WikiConverter/MediaWiki.pm',
LICENSE => 'perl',
PL_FILES => {},
PREREQ_PM => {
'Test::More' => 0,
'URI' => 1.35,
'HTML::WikiConverter' => 0.63, # for the 'table w/ blocks' test
},
dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', },
clean => { FILES => 'HTML-WikiConverter-MediaWiki-*' },
);
HTML-WikiConverter-MediaWiki-0.59/README 0000644 0000000 0000000 00000002631 11472547624 0017541 0 ustar 00root root 0000000 0000000 HTML::WikiConverter::MediaWiki
==============================
HTML::WikiConverter::MediaWiki adds the MediaWiki dialect to
HTML::WikiConverter allowing conversion from HTML to MediaWiki markup.
SYNOPSIS
Converting HTML to wiki markup is easy:
use HTML::WikiConverter;
my $wc = new HTML::WikiConverter( dialect => 'MediaWiki' );
print $wc->html2wiki( $html );
Or from the command line:
% html2wiki --dialect MediaWiki input.html > output.wiki
There's also a web interface if you're so inclined:
http://toolserver.org/~diberri/cgi-bin/html2wiki/
INSTALLATION
To install this module, run the following commands:
perl Makefile.PL
make
make test
make install
SUPPORT AND DOCUMENTATION
After installing, you can find documentation for this module with the
perldoc command.
perldoc HTML::WikiConverter::MediaWiki
You can also look for information at:
Search CPAN
http://search.cpan.org/dist/HTML-WikiConverter-MediaWiki
CPAN Request Tracker:
http://rt.cpan.org/NoAuth/Bugs.html?Dist=HTML-WikiConverter-MediaWiki
AnnoCPAN, annotated CPAN documentation:
http://annocpan.org/dist/HTML-WikiConverter-MediaWiki
CPAN Ratings:
http://cpanratings.perl.org/d/HTML-WikiConverter-MediaWiki
COPYRIGHT AND LICENCE
Copyright (c) David J. Iberri
This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
HTML-WikiConverter-MediaWiki-0.59/lib/ 0000755 0000000 0000000 00000000000 11472547624 0017425 5 ustar 00root root 0000000 0000000 HTML-WikiConverter-MediaWiki-0.59/lib/HTML/ 0000755 0000000 0000000 00000000000 11472547624 0020171 5 ustar 00root root 0000000 0000000 HTML-WikiConverter-MediaWiki-0.59/lib/HTML/WikiConverter/ 0000755 0000000 0000000 00000000000 11472547624 0022764 5 ustar 00root root 0000000 0000000 HTML-WikiConverter-MediaWiki-0.59/lib/HTML/WikiConverter/MediaWiki.pm 0000644 0000000 0000000 00000034232 11472547624 0025171 0 ustar 00root root 0000000 0000000 package HTML::WikiConverter::MediaWiki;
use base 'HTML::WikiConverter';
use warnings;
use strict;
use URI;
use File::Basename;
use HTML::Tagset;
our $VERSION = '0.59';
=head1 NAME
HTML::WikiConverter::MediaWiki - Convert HTML to MediaWiki markup
=head1 SYNOPSIS
use HTML::WikiConverter;
my $wc = new HTML::WikiConverter( dialect => 'MediaWiki' );
print $wc->html2wiki( $html );
=head1 DESCRIPTION
This module contains rules for converting HTML into MediaWiki
markup. See L for additional usage details.
=head1 ATTRIBUTES
In addition to the regular set of attributes recognized by the
L constructor, this dialect also accepts the
following attributes:
=head2 preserve_bold
Boolean indicating whether bold HTML elements should be preserved as
HTML in the wiki output rather than being converted into MediaWiki
markup.
By default, EbE and EstrongE elements are converted to
wiki markup identically. But sometimes you may wish EbE tags
in the HTML to be preserved in the resulting MediaWiki markup. This
attribute allows this.
For example, if C is enabled, HTML like
will be converted to
* Bold
* '''Strong'''
When disabled (the default), the preceding HTML markup would be
converted into
* '''Bold'''
* '''Strong'''
=head2 preserve_italic
Boolean indicating whether italic HTML elements should be preserved as
HTML in the wiki output rather than being converted into MediaWiki
markup.
For example, if C is enabled, HTML like
will be converted to
* Italic
* ''Emphasized''
When disabled (the default), the preceding HTML markup would be
converted into
* ''Italic''
* ''Emphasized''
=head2 preserve_templates
Boolean indicating whether C<{{template}}> calls found in HTML should
be preserved in the wiki markup. If disabled (the default), templates
calls will be wrapped in CnowikiE> tags.
=head2 preserve_nowiki
Boolean indicating whether CnowikiE> tags found in HTML
should be preserved in the wiki markup. If disabled (the default),
nowiki tags will be replaced with their content.
=head2 pad_headings
Boolean indicating whether section headings should be padded with
spaces (eg, "== Section ==" instead of "==Section=="). Default is
false (ie, not to pad).
=cut
my @common_attrs = qw/ id class lang dir title style /;
my @block_attrs = ( @common_attrs, 'align' );
my @tablealign_attrs = qw/ align char charoff valign /;
my @tablecell_attrs = qw(
abbr axis headers scope rowspan
colspan nowrap width height bgcolor
);
# Fix for bug 14527
my $pre_prefix = '[jsmckaoqkjgbhazkfpwijhkixh]';
sub rules {
my $self = shift;
my %rules = (
hr => { replace => "\n----\n" },
br => { preserve => 1, empty => 1, attributes => [ qw/id class title style clear/ ] },
p => { block => 1, trim => 'both', line_format => 'single' },
em => { start => "''", end => "''", line_format => 'single' },
strong => { start => "'''", end => "'''", line_format => 'single' },
i => { alias => 'em' },
b => { alias => 'strong' },
pre => { line_prefix => $pre_prefix, block => 1 },
table => { start => \&_table_start, end => "|}", block => 1, line_format => 'blocks' },
tr => { start => \&_tr_start },
td => { start => \&_td_start, end => "\n", trim => 'both', line_format => 'blocks' },
th => { start => \&_td_start, end => "\n", trim => 'both', line_format => 'single' },
caption => { start => \&_caption_start, end => "\n", line_format => 'single' },
img => { replace => \&_image },
a => { replace => \&_link },
ul => { line_format => 'multi', block => 1 },
ol => { alias => 'ul' },
dl => { alias => 'ul' },
li => { start => \&_li_start, trim => 'leading' },
dt => { alias => 'li' },
dd => { alias => 'li' },
# Preserved elements, from MediaWiki's Sanitizer.php (http://tinyurl.com/dzj6o)
div => { preserve => 1, attributes => \@block_attrs },
span => { preserve => 1, attributes => \@block_attrs },
blockquote => { preserve => 1, attributes => [ @common_attrs, qw/ cite / ] },
del => { preserve => 1, attributes => [ @common_attrs, qw/ cite datetime / ] },
ins => { preserve => 1, attributes => [ @common_attrs, qw/ cite datetime / ] },
font => { preserve => 1, attributes => [ @common_attrs, qw/ size color face / ] },
# Headings (h1-h6)
h1 => { start => \&_hr_start, end => \&_hr_end, block => 1, trim => 'both', line_format => 'single' },
h2 => { start => \&_hr_start, end => \&_hr_end, block => 1, trim => 'both', line_format => 'single' },
h3 => { start => \&_hr_start, end => \&_hr_end, block => 1, trim => 'both', line_format => 'single' },
h4 => { start => \&_hr_start, end => \&_hr_end, block => 1, trim => 'both', line_format => 'single' },
h5 => { start => \&_hr_start, end => \&_hr_end, block => 1, trim => 'both', line_format => 'single' },
h6 => { start => \&_hr_start, end => \&_hr_end, block => 1, trim => 'both', line_format => 'single' },
);
my @preserved = qw/ center cite code var sup sub tt big small strike s u ruby rb rt rp /;
push @preserved, 'i' if $self->preserve_italic;
push @preserved, 'b' if $self->preserve_bold;
push @preserved, 'nowiki' if $self->preserve_nowiki;
$rules{$_} = { preserve => 1, attributes => \@common_attrs } foreach @preserved;
return \%rules;
}
sub attributes { {
preserve_italic => { default => 0 },
preserve_bold => { default => 0 },
strip_tags => { default => [ qw/ head style script ~comment title meta link object / ] },
pad_headings => { default => 0 },
preserve_templates => { default => 0 },
preserve_nowiki => { default => 0 },
# see bug #28402
# xxx passthrough_naked_tags => { default => [ qw/ tbody thead font / ] },
passthrough_naked_tags => { default => [ qw/ tbody thead font span / ] },
} }
sub _hr_start {
my( $wc, $node, $subrules ) = @_;
( my $level = $node->tag ) =~ s/\D//g;
my $affix = ('=') x $level;
return $wc->pad_headings ? "$affix " : $affix;
}
sub _hr_end {
my( $wc, $node, $subrules ) = @_;
( my $level = $node->tag ) =~ s/\D//g;
my $affix = ('=') x $level;
return $wc->pad_headings ? " $affix" : $affix;
}
sub postprocess_output {
my( $self, $outref ) = @_;
$$outref =~ s/\Q$pre_prefix\E/ /g;
}
# Calculates the prefix that will be placed before each list item.
# Handles ordered, unordered, and definition list items.
sub _li_start {
my( $self, $node, $rules ) = @_;
my @parent_lists = $node->look_up( _tag => qr/ul|ol|dl/ );
my $prefix = '';
foreach my $parent ( @parent_lists ) {
my $bullet = '';
$bullet = '*' if $parent->tag eq 'ul';
$bullet = '#' if $parent->tag eq 'ol';
$bullet = ':' if $parent->tag eq 'dl';
$bullet = ';' if $parent->tag eq 'dl' and $node->tag eq 'dt';
$prefix = $bullet.$prefix;
}
return "\n$prefix ";
}
sub _link {
my( $self, $node, $rules ) = @_;
my $url = defined $node->attr('href') ? $node->attr('href') : '';
my $text = $self->get_elem_contents($node);
# Handle internal links
if( my $title = $self->get_wiki_page( $url ) ) {
$title =~ s/_/ /g;
return "[[$title]]" if $text eq $title; # no difference between link text and page title
return "[[$text]]" if $text eq lcfirst $title; # differ by 1st char. capitalization
return "[[$title|$text]]"; # completely different
}
# Treat them as external links
return $url if $url eq $text;
return "[$url $text]";
}
sub _image {
my( $self, $node, $rules ) = @_;
return '' unless $node->attr('src');
my $alt = $node->attr('alt') || '';
my $img = basename( URI->new($node->attr('src'))->path );
my $width = $node->attr('width') || '';
return sprintf '[[Image:%s|%spx|%s]]', $img, $width, $alt if $alt and $width;
return sprintf '[[Image:%s|%s]]', $img, $alt if $alt;
return sprintf '[[Image:%s]]', $img;
}
sub _table_start {
my( $self, $node, $rules ) = @_;
my $prefix = '{|';
my @table_attrs = (
@common_attrs,
qw/ summary width border frame rules cellspacing
cellpadding align bgcolor frame rules /
);
my $attrs = $self->get_attr_str( $node, @table_attrs );
$prefix .= ' '.$attrs if $attrs;
return $prefix."\n";
}
sub _tr_start {
my( $self, $node, $rules ) = @_;
my $prefix = '|-';
my @tr_attrs = ( @common_attrs, 'bgcolor', @tablealign_attrs );
my $attrs = $self->get_attr_str( $node, @tr_attrs );
$prefix .= ' '.$attrs if $attrs;
return '' unless $node->left or $attrs;
return $prefix."\n";
}
# List of tags (and pseudo-tags, in the case of '~text') that are
# considered phrasal elements. Any table cells that contain only these
# elements will be placed on a single line.
my @td_phrasals = qw/ i em b strong u tt code span font sup sub br ~text s strike del ins /;
my %td_phrasals = map { $_ => 1 } @td_phrasals;
sub _td_start {
my( $self, $node, $rules ) = @_;
my $prefix = $node->tag eq 'th' ? '!' : '|';
my @td_attrs = ( @common_attrs, @tablecell_attrs, @tablealign_attrs );
my $attrs = $self->get_attr_str( $node, @td_attrs );
$prefix .= ' '.$attrs.' |' if $attrs;
# If there are any non-text elements inside the cell, then the
# cell's content should start on its own line
my @non_text = grep !$td_phrasals{$_->tag}, $node->content_list;
my $space = @non_text ? "\n" : ' ';
return $prefix.$space;
}
sub _caption_start {
my( $self, $node, $rules ) = @_;
my $prefix = '|+ ';
my @caption_attrs = ( @common_attrs, 'align' );
my $attrs = $self->get_attr_str( $node, @caption_attrs );
$prefix .= $attrs.' |' if $attrs;
return $prefix;
}
sub preprocess_node {
my( $self, $node ) = @_;
my $tag = defined $node->tag ? $node->tag : '';
$self->strip_aname($node) if $tag eq 'a';
$self->_strip_extra($node);
$self->_nowiki_text($node) if $tag eq '~text';
# # XXX font-to-span convers
# $node->tag('span') if $tag eq 'font';
}
my $URL_PROTOCOLS = 'http|https|ftp|irc|gopher|news|mailto';
my $EXT_LINK_URL_CLASS = '[^]<>"\\x00-\\x20\\x7F]';
my $EXT_LINK_TEXT_CLASS = '[^\]\\x00-\\x1F\\x7F]';
# Text nodes matching one or more of these patterns will be enveloped
# in and
sub _wikitext_patterns {
my $self = shift;
# the caret in "qr/^/" seems redundant with "start_of_line" but both
# are necessary
my %wikitext_patterns = (
misc => { pattern => qr/^(?:\*|\#|\;|\:|\=|\!|\|)/m, location => 'start_of_line' },
italic => { pattern => qr/''/, location => 'anywhere' },
rule => { pattern => qr/^----/m, location => 'start_of_line' },
table => { pattern => qr/^\{\|/m, location => 'start_of_line' },
link => { pattern => qr/\[\[/m, location => 'anywhere' },
template => { pattern => qr/{{/m, location => 'anywhere' },
);
delete $wikitext_patterns{template} if $self->preserve_templates;
return \%wikitext_patterns;
}
sub _nowiki_text {
my( $self, $node ) = @_;
my $text = defined $node->attr('text') ? $node->attr('text') : '';
return unless $text;
my $wikitext_patterns = $self->_wikitext_patterns;
my $found_nowiki_text = 0;
ANYWHERE: {
my @anywhere_patterns =
map { $_->{pattern} } grep { $_->{location} eq 'anywhere' } values %$wikitext_patterns;
$found_nowiki_text++ if $self->_match( $text, \@anywhere_patterns );
};
START_OF_LINE: {
last if $found_nowiki_text;
my @sol_patterns =
map { $_->{pattern} } grep { $_->{location} eq 'start_of_line' } values %$wikitext_patterns;
# find closest parent that is a block-level node
my $nearest_parent_block = $self->elem_search_lineage( $node, { block => 1 } );
if( $nearest_parent_block ) {
my $leftmostish_text_node = $self->_get_leftmostish_text_node( $nearest_parent_block );
if( $leftmostish_text_node and $node == $leftmostish_text_node ) {
# I'm the first child in this block element, so let's apply start_of_line nowiki fixes
$found_nowiki_text++ if $self->_match( $text, \@sol_patterns );
}
}
};
if( $found_nowiki_text ) {
$text = "$text";
} else {
$text =~ s~(\[\b(?:$URL_PROTOCOLS):$EXT_LINK_URL_CLASS+ *$EXT_LINK_TEXT_CLASS*?\])~$1~go;
}
$node->attr( text => $text );
}
sub _get_leftmostish_text_node {
my( $self, $node ) = @_;
return unless $node;
return $node if $node->tag eq '~text';
return $self->_get_leftmostish_text_node( ($node->content_list)[0] )
}
sub _match {
my( $self, $text, $patterns ) = @_;
$text =~ $_ && return 1 for @$patterns;
return 0;
}
my %extra = (
id => qr/catlinks/,
class => qr/urlexpansion|printfooter|editsection/
);
# Delete ... et al
sub _strip_extra {
my( $self, $node ) = @_;
my $tag = defined $node->tag ? $node->tag : '';
foreach my $att_name ( keys %extra ) {
my $att_value = defined $node->attr($att_name) ? $node->attr($att_name) : '';
if( $att_value =~ $extra{$att_name} ) {
$node->detach();
$node->delete();
return;
}
}
}
=head1 AUTHOR
David J. Iberri, C<< >>
=head1 BUGS
Please report any bugs or feature requests to
C, or through the web
interface at
L.
I will be notified, and then you'll automatically be notified of
progress on your bug as I make changes.
=head1 SUPPORT
You can find documentation for this module with the perldoc command.
perldoc HTML::WikiConverter::MediaWiki
You can also look for information at:
=over 4
=item * AnnoCPAN: Annotated CPAN documentation
L
=item * CPAN Ratings
L
=item * RT: CPAN's request tracker
L
=item * Search CPAN
L
=back
=head1 COPYRIGHT & LICENSE
Copyright 2006 David J. Iberri, all rights reserved.
This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=cut
1;
HTML-WikiConverter-MediaWiki-0.59/t/ 0000755 0000000 0000000 00000000000 11472547624 0017122 5 ustar 00root root 0000000 0000000 HTML-WikiConverter-MediaWiki-0.59/t/00-load.t 0000644 0000000 0000000 00000000310 11472547624 0020435 0 ustar 00root root 0000000 0000000 #!perl -T
use Test::More tests => 1;
BEGIN {
use_ok( 'HTML::WikiConverter::MediaWiki' );
}
diag( "Testing HTML::WikiConverter::MediaWiki $HTML::WikiConverter::MediaWiki::VERSION, Perl $], $^X" );
HTML-WikiConverter-MediaWiki-0.59/t/01-mediawiki.t 0000644 0000000 0000000 00000032744 11472547624 0021502 0 ustar 00root root 0000000 0000000 local $/;
require 't/runtests.pl';
runtests( data => , dialect => 'MediaWiki', wiki_uri => 'http://www.test.com/wiki/' );
close DATA;
__DATA__
external link
__H__
[http://example.com]
__W__
[http://example.com [http://example.com]]
__NEXT__
nowiki template
__H__
mark stubs with {{stub}}
__W__
mark stubs with {{stub}}
__NEXT__
nowiki quoted
__H__
what happens to 'quoted text'?
__W__
what happens to 'quoted text'?
__NEXT__
nowiki doubly quoted
__H__
how about ''doubly quoted''?
__W__
how about ''doubly quoted''?
__NEXT__
nowiki triply quoted
__H__
and '''triply quoted'''?
__W__
and '''triply quoted'''?
__NEXT__
nowiki hr
__H__
----
__W__
----
__NEXT__
nowiki ul
__H__
* ul
__W__
* ul
__NEXT__
nowiki ol
__H__
# ol
__W__
# ol
__NEXT__
nowiki def
__H__
; def
__W__
; def
__NEXT__
nowiki indent
__H__
: indent
__W__
: indent
__NEXT__
nowiki internal links
__H__
an [[internal]] link
__W__
an [[internal]] link
__NEXT__
nowiki table markup
__H__
{|
| table
|}
__W__
{| | table |}
__NEXT__
nowiki ext link
__H__
[http://example.com]
__W__
[http://example.com]
__NEXT__
(bug #46453) triggering too often
__H__
x:bla
__W__
''x'':bla
__NEXT__
do not add a tag only if offending character(s) occur at the beginning of text node
__H__
text *
text #
text ;
text :
text =
text !
text |
text ----
text {|
__W__
text '''*'''
text '''#'''
text ''';'''
text ''':'''
text '''='''
text '''!'''
text '''|'''
text '''----'''
text '''{|'''
__NEXT__
tr attributes
__H__
__W__
{|
|- align="left" valign="top"
| ok
|}
__NEXT__
preserve cite
__H__
text
__W__
text
__NEXT__
preserve var
__H__
text
__W__
text
__NEXT__
preserve blockquote
__H__
text
__W__
text
__NEXT__
preserve ruby
__H__
text
__W__
text
__NEXT__
preserve rb
__H__
text
__W__
text
__NEXT__
preserve rt
__H__
__W__
__NEXT__
preserve rp
__H__
__W__
__NEXT__
preserve div
__H__
ok
__W__
ok
__NEXT__
empty line break
__H__
__W__
__NEXT__
br attribs
__H__
ok
__W__
ok
__NEXT__
wrap in html
__H__
GOOGLE
NewLine
__W__
[http://google.com GOOGLE] NewLine
__NEXT__
bold
__H__
bold
__W__
'''bold'''
__NEXT__
italics
__H__
italics
__W__
''italics''
__NEXT__
bold and italics
__H__
bold and italics
__W__
'''bold''' and ''italics''
__NEXT__
bold-italics nested
__H__
bold-italics nested
__W__
'''''bold-italics'' nested'''
__NEXT__
strong
__H__
strong
__W__
'''strong'''
__NEXT__
emphasized
__H__
emphasized
__W__
''emphasized''
__NEXT__
underlined
__H__
underlined
__W__
underlined
__NEXT__
strikethrough
__H__
strike
__W__
strike
__NEXT__
deleted
__H__
deleted text
__W__
deleted text
__NEXT__
inserted
__H__
inserted
__W__
inserted
__NEXT__
span tags removed if naked (ie, have no attribs)
__H__
text here
__W__
text here
__NEXT__
strip aname
__H__
__W__
__NEXT__
one-line phrasals
__H__
phrasals
in one line
__W__
''phrasals in one line''
__NEXT__
paragraph blocking
__H__
p1 p2
__W__
p1
p2
__NEXT__
lists
__H__
__W__
* 1
* 2
__NEXT__
nested lists
__H__
__W__
* 1
** 1a
** 1b
* 2
__NEXT__
nested lists (different types)
__H__
__W__
* 1
** a
**# i
** b
* 2
*: indented
__NEXT__
hr
__H__
__W__
----
__NEXT__
br
__H__
stuff stuff two
__W__
stuff stuff two
__NEXT__
div
__H__
thing
__W__
thing
__NEXT__
div w/ attrs
__H__
thing
__W__
thing
__NEXT__
sub
__H__
H2O
__W__
H2O
__NEXT__
sup
__H__
x2
__W__
x2
__NEXT__
center
__H__
centered text
__W__
centered text
__NEXT__
small
__H__
small text
__W__
small text
__NEXT__
code
__H__
$name = 'stan';
__W__
$name = 'stan';
__NEXT__
tt
__H__
tt text
__W__
tt text
__NEXT__
font-to-span conversion ::TODO("HTML::WikiConverter::Normalizer not doing font-to-span conversion yet")
__H__
font
__W__
font
__NEXT__
font
__H__
font
__W__
font
__NEXT__
pre
__H__
this
is
preformatted
text
__W__
this
is
preformatted
text
__NEXT__
indent
__H__
- indented text
__W__
: indented text
__NEXT__
nested indent
__H__
- stuff
- double-indented
__W__
: stuff
:: double-indented
__NEXT__
h1
__H__
h1
__W__
=h1=
__NEXT__
h2
__H__
h2
__W__
==h2==
__NEXT__
h3
__H__
h3
__W__
===h3===
__NEXT__
h4
__H__
h4
__W__
====h4====
__NEXT__
h5
__H__
h5
__W__
=====h5=====
__NEXT__
h6
__H__
h6
__W__
======h6======
__NEXT__
img
__H__
__W__
[[Image:thing.gif]]
__NEXT__
table
__H__
Stuff
Name | David |
Age | 24 |
Height | 6' |
|
__W__
{|
|+ Stuff
|-
! Name
| David
|-
! Age
| 24
|-
! Height
| 6'
|-
|
{|
| Nested
| tables
|-
| are
| fun
|}
|}
__NEXT__
table w/ attrs
__H__
Stuff
Name | Foo |
Age | 24 |
Height | 6' |
__W__
{| border="1" cellpadding="3" bgcolor="#ffffff"
|+ Stuff
|- id="first" class="unselected"
! id="thing" bgcolor="black" | Name
| Foo
|- class="selected"
! Age
| 24
|- class="unselected"
! Height
| 6'
|}
__NEXT__
table w/ blocks
__H__
__W__
{|
| align="center" |
Paragraph 1
Paragraph 2
|}
__NEXT__
strip empty aname
__H__
some text
__W__
some text
__NEXT__
wiki link (text == title)
__H__
Some wiki page
__W__
[[Some wiki page]]
__NEXT__
wiki link (text case != title case)
__H__
another page
__W__
[[another page]]
__NEXT__
wiki link (text != title)
__H__
some text
__W__
[[Another page|some text]]
__NEXT__
external links
__H__
thing
__W__
[http://www.test.com thing]
__NEXT__
external links (rel2abs)
__H__
thing
__W__
[http://www.test.com/thing.html thing]
__NEXT__
strip urlexpansion
__H__
Google (http://www.google.com)
__W__
[http://www.google.com Google]
__NEXT__
strip printfooter
__H__
__W__
__NEXT__
strip catlinks
__H__
__W__
__NEXT__
strip editsection
__H__
This is great
__W__
This is
great
__NEXT__
escape bracketed urls
__H__
This is a text node with what looks like an ext. link [http://example.org].
__W__
This is a text node with what looks like an ext. link [http://example.org].
__NEXT__
line with vertical bar
__H__
| a line with a vertical bar
__W__
| a line with a vertical bar
__NEXT__
line that starts with a bang
__H__
! a line that starts with a bang
__W__
! a line that starts with a bang
__NEXT__
line that looks like a section
__H__
= a line that looks like a section
__W__
= a line that looks like a section
__NEXT__
pre-many (bug #14527)
__H__
preformatted text
with spaces
should produce only one
pre-block
__W__
preformatted text
with spaces
should produce only one
pre-block
__NEXT__
pre following pre
__H__
preformatted text
more preformatted text
once again
__W__
preformatted text
more preformatted text
once again
__NEXT__
preserve ::preserve_bold(1)
__H__
hello
__W__
hello
__NEXT__
hr under td
__H__
__W__
{|
|
----
|}
__NEXT__
img alt
__H__
__W__
[[Image:thing.gif|Just a test]]
__NEXT__
no preserve templates ::preserve_templates(0)
__H__
{{template}}
__W__
{{template}}
__NEXT__
preserve templates ::preserve_templates(1)
{{template}}
__W__
{{template}}
__NEXT__
no preserve nowiki ::preserve_nowiki(0)
__H__
hey
__W__
hey
__NEXT__
preserve nowiki ::preserve_nowiki(1)
__H__
hey
__W__
hey
__NEXT__
preserve image width
__H__
__W__
[[Image:thing.jpg|200px|The Thing]]
__NEXT__
tbody and thead fixes (bug #28402)
__H__
heading col 1 |
heading col 2 |
heading last col |
data first col first row |
data c2 r1 |
data c3 r1 |
data c1 r2 |
data c2 r2 |
data c3 r2 |
data c1 r3 |
data c2 r3 |
data c3 r3 |
__W__
{| border="1"
|-
! heading col 1
! heading col 2
! heading last col
|-
| data first col first row
| data c2 r1
| data c3 r1
|-
| data c1 r2
| data c2 r2
| data c3 r2
|-
| data c1 r3
| data c2 r3
| data c3 r3
|}
__NEXT__
don't pad headings ::pad_headings(0)
__H__
Heading
__W__
==Heading==
__NEXT__
table with zeros
__H__
__W__
{|
| 0
|-
| 1
|-
| 0
|-
| 1
|}
__NEXT__
(bug #40845) internal links, without wiki_uri
__H__
Class Browser
__W__
[http://www.test.com/class_browser.html Class Browser]
__NEXT__
(bug #40845) internal links, with wiki_uri=base_uri ::wiki_uri('http://www.test.com/')
__H__
Class Browser
__W__
[[class browser.html|Class Browser]]
__NEXT__
(bug #40845) broken links with anchors, without wiki_uri
__H__
adding
__W__
[http://www.test.com#Adding adding]
__NEXT__
(bug #40845) links with anchors, with wiki_uri ::wiki_uri('http://www.test.com/') ::TODO('wiki_uri not working with an ending slash')
__H__
adding
__W__
[[#Adding|adding]]
__NEXT__
(bug #24745) font/span weirdness ::TODO("HTML::WikiConverter::Normalizer doesn't handle this yet");
__H__
The Test Header
__W__
The Test Header
__NEXT__
(bug #29342) Tag attributes with 0 ::TODO("this is actually an H::WC-specific bug")
__H__
__W__
{| border="1" cellpadding="3" cellspacing="0"
| Hello
| World
|}
HTML-WikiConverter-MediaWiki-0.59/t/boilerplate.t 0000644 0000000 0000000 00000002343 11472547624 0021613 0 ustar 00root root 0000000 0000000 #!perl -T
use strict;
use warnings;
use Test::More tests => 3;
sub not_in_file_ok {
my ($filename, %regex) = @_;
open my $fh, "<", $filename
or die "couldn't open $filename for reading: $!";
my %violated;
while (my $line = <$fh>) {
while (my ($desc, $regex) = each %regex) {
if ($line =~ $regex) {
push @{$violated{$desc}||=[]}, $.;
}
}
}
if (%violated) {
fail("$filename contains boilerplate text");
diag "$_ appears on lines @{$violated{$_}}" for keys %violated;
} else {
pass("$filename contains no boilerplate text");
}
}
not_in_file_ok(README =>
"The README is used..." => qr/The README is used/,
"'version information here'" => qr/to provide version information/,
);
not_in_file_ok(Changes =>
"placeholder date/time" => qr(Date/time)
);
sub module_boilerplate_ok {
my ($module) = @_;
not_in_file_ok($module =>
'the great new $MODULENAME' => qr/ - The great new /,
'boilerplate description' => qr/Quick summary of what the module/,
'stub function definition' => qr/function[12]/,
);
}
module_boilerplate_ok('lib/HTML/WikiConverter/MediaWiki.pm');
HTML-WikiConverter-MediaWiki-0.59/t/mediawiki.preserve.t 0000644 0000000 0000000 00000002000 11472547624 0023074 0 ustar 00root root 0000000 0000000 local $/;
require 't/runtests.pl';
runtests( data => , dialect => 'MediaWiki', minimal => 1, preserve_italic => 1, preserve_bold => 1 );
close DATA;
__DATA__
preserve bold
__H__
bold
__W__
bold
__NEXT__
preserve bold w/ attrs
__H__
this
__W__
this
__NEXT__
preserve bold w/ bad attrs
__H__
clickme
__W__
clickme
__NEXT__
convert strong
__H__
strong
__W__
'''strong'''
__NEXT__
both strong/b
__H__
__W__
* bold
* '''strong'''
__NEXT__
preserve italic
__H__
italic
__W__
italic
__NEXT__
preserve italic w/ attrs
__H__
italic
__W__
italic
__NEXT__
preserve italic w/ bad attrs
__H__
clickme
__W__
clickme
__NEXT__
convert em
__H__
em
__W__
''em''
__NEXT__
both em/i
__H__
__W__
* italic
* ''em''
HTML-WikiConverter-MediaWiki-0.59/t/pod-coverage.t 0000644 0000000 0000000 00000001055 11472547624 0021663 0 ustar 00root root 0000000 0000000 #!perl -T
use Test::More;
eval "use Test::Pod::Coverage 1.04";
plan skip_all => "Test::Pod::Coverage 1.04 required for testing POD coverage" if $@;
all_pod_coverage_ok( { also_private => [
# These methods are documented in HTML::WikiConverter::Dialects
qr/
get_elem_contents
|get_wiki_page
|get_attr_str
|elem_within_block
|is_camel_case
|rule
|rules
|attribute
|attributes
|preprocess_tree
|preprocess_node
|postprocess_output
|caption2para
|strip_aname
|base_url
|wiki_url
/x
] } );
HTML-WikiConverter-MediaWiki-0.59/t/pod.t 0000644 0000000 0000000 00000000214 11472547624 0020066 0 ustar 00root root 0000000 0000000 #!perl -T
use Test::More;
eval "use Test::Pod 1.14";
plan skip_all => "Test::Pod 1.14 required for testing POD" if $@;
all_pod_files_ok();
HTML-WikiConverter-MediaWiki-0.59/t/runtests.pl 0000644 0000000 0000000 00000006625 11472547624 0021357 0 ustar 00root root 0000000 0000000 #!/usr/bin/perl
use warnings;
use strict;
use Test::More;
use File::Spec;
use HTML::Entities;
use HTML::WikiConverter;
*e = \&encode_entities;
my $more_tests = < comment
__W__
A comment
__NEXT__
strip head
__H__
fun stuff
Crazy stuff here
__W__
Crazy stuff here
__NEXT__
strip scripts
__H__
benevolent text
__W__
benevolent text
END_TESTS
sub runtests {
my %arg = @_;
$arg{wrap_in_html} = 1;
$arg{base_uri} ||= 'http://www.test.com';
my $minimal = $arg{minimal} || 0;
my $data = $arg{data} || '';
$data .= entity_tests() . $more_tests unless $minimal;
my @tests = split /__NEXT__\n/, $data;
my $numtests = @tests;
#$numtests += 1 unless $minimal; # file test
plan tests => $numtests;
# Delete unrecognized HTML::WikiConverter options
delete $arg{$_} for qw/ data minimal /;
my $wc = new HTML::WikiConverter(%arg);
foreach my $test ( @tests ) {
$test =~ s/^(.*?)\n//; my $name = $1;
my( $html, $wiki ) = split /__W__\n/, $test;
$html =~ s/__H__\n//;
# $name =~ s{\s*\:\:(\w+\([^\)]*?\))}{
# my $method_call = $1;
# eval "\$wc->$method_call;";
# die "Failed test call ($name): $@" if $@;
# '';
# }ge;
my( $todo, $todo_reason );
$name =~ s{\s*\:\:(\w+\([^\)]*?\))}{
my $keyword = $1;
if( $keyword =~ /TODO\((\"|\')(.*?)\1/ ) {
$todo = 1;
$todo_reason = $2;
} else {
my $method_call = $keyword;
eval "\$wc->$method_call;";
die "Failed test call ($name): $@" if $@;
}
'';
}ge;
for( $html, $wiki ) { s/^\n+//; s/\n+$// }
if( $todo ) {
TODO: {
local $TODO = $todo_reason;
is( $wc->html2wiki($html), $wiki, $name );
}
} else {
is( $wc->html2wiki($html), $wiki, $name );
}
}
#file_test($wc) unless $minimal;
}
sub entity_tests {
my $tmpl = "__NEXT__\n%s\n__H__\n%s\n__W__\n%s\n"; # test-name, html-input, expected-wiki-output
my $data = '';
my @chars = ( '<', '>', '&' );
foreach my $char ( @chars ) {
( my $charname = e($char) ) =~ s/[&;]//g;
$data .= sprintf $tmpl, "literal ($charname)", $char, e($char)
. sprintf $tmpl, "encode ($charname)", e($char), e($char)
. sprintf $tmpl, "meta ($charname)", e(e($char)), e(e($char));
}
return $data;
}
sub _slurp {
my $path = shift;
open H, $path or die "couldn't open $path: $!";
local $/;
my $c = ;
close H;
return $c;
}
sub file_test {
my $wc = shift;
my $lc_dialect = lc $wc->dialect;
my $infile = File::Spec->catfile( 't', 'complete.html' );
my $outfile = File::Spec->catfile( 't', "complete.$lc_dialect" );
SKIP: {
skip "Couldn't find $infile (ignore this)", 1 unless -e $infile;
skip "Couldn't find $outfile (ignore this)", 1 unless -e $outfile;
my( $got, $expect ) = ( $wc->html2wiki( file => $infile, slurp => 1 ), _slurp($outfile) );
for( $got, $expect ) { s/^\n+//; s/\n+$// }
is( $got, $expect, 'read from file' );
};
}
1;
|