Sisyphus repository
Last update: 1 october 2023 | SRPMs: 18631 | Visits: 37524400
en ru br
ALT Linux repos
S:1.06a-alt4
5.0: 1.06a-alt2

Group :: Development/Perl
RPM: perl-Text-DelimMatch

 Main   Changelog   Spec   Patches   Sources   Download   Gear   Bugs and FR  Repocop 

pax_global_header00006660000000000000000000000064076624042620014522gustar00rootroot0000000000000052 comment=3f5bc78dec5aba0b28642f0172a52f7d427d0a23
DelimMatch-1.06a/000075500000000000000000000000000766240426200136165ustar00rootroot00000000000000DelimMatch-1.06a/DelimMatch.pm000064400000000000000000000532320766240426200161700ustar00rootroot00000000000000package Text::DelimMatch;

use strict;
use vars qw($VERSION @ISA @EXPORT $case_sensitive);

require 5.000;
require Exporter;
require AutoLoader;

@ISA = qw(Exporter AutoLoader);
@EXPORT = qw();
$VERSION = '1.06';

sub new {
my $type = shift;
my $start = shift;
my $end = shift || $start;
my $esc = shift;
my $dblesc= shift;
my $class = ref($type) || $type;
my $self = bless {}, $class;
local $_ = "no -w warning in evals now";

eval "/$start/" if defined($start);
eval "/$end/" if !$@ && defined($end);

$self->{'STARTREGEXP'} = $start; # a regexp
$self->{'ENDREGEXP'} = $end; # a regexp
$self->{'QUOTE'} = {}; # a hash of regexp, start => end
$self->{'ESCAPE'} = ""; # a regexp set of chars
$self->{'DBLESCAPE'} = ""; # a regexp set of chars

$self->{'ERROR'} = $@; # false if OK
$self->{'DEBUG'} = 0; # boolean
$self->{'CASESENSE'} = 0; # boolean
$self->{'FORCESLOW'} = 0; # boolean
$self->{'KEEP'} = 1; # boolean
$self->{'RETURNDELIM'} = 1; # boolean

$self->{'BUFFER'} = "";
$self->{'PRE'} = "";
$self->{'MATCH'} = "";
$self->{'POST'} = "";

$self->escape($esc) if $esc;
$self->double_escape($dblesc) if $dblesc;
$self->quote(@_) if @_;

return $self;
}

sub delim {
my $self = shift;
my $start = shift;
my $end = shift || $start;
my $curs = $self->{'STARTREGEXP'};
my $cure = $self->{'ENDREGEXP'};
local $_ = "no -w warning in evals now";

eval "/$start/" if defined($start);
eval "/$end/" if !$@ && defined($end);

$self->{'ERROR'} = $@; # false if OK
$self->{'STARTREGEXP'} = $start;
$self->{'ENDREGEXP'} = $end;

if ($self->{'DEBUG'}) {
print "DELIM : $start, $end";
print ": ", $self->{'ERROR'} if $self->{'ERROR'};
print "\n";
}

return ($curs, $cure);
}

sub quote {
my $self = shift;
my (%oldq) = %{$self->{'QUOTE'}};
local $_ = "no -w warning in evals now";
my ($key, $val);

$key = shift @_;

if (!defined($key)) {
$self->{'QUOTE'} = {};
} else {
while ($key) {
$val = shift @_ || $key;

eval "/$key/" if defined($key);
eval "/$val/" if !$@ && defined($val);
$self->{'ERROR'} = $@ if $@;

if ($self->{'DEBUG'}) {
print "QUOTE : $key = $val";
print ": ", $self->{'ERROR'} if $self->{'ERROR'};
print "\n";
}

$self->{'QUOTE'}->{$key} = $val;
$key = shift @_;
}
}

return %oldq;
}

sub escape {
my $self = shift;
my $esc = shift;
my $curesc = $self->{'ESCAPE'};
local $_ = "no -w warning in evals now";

$esc = '[' . quotemeta($esc) . ']' if defined($esc) && ($esc ne "");

if (defined($esc) && ($esc ne "")) {
eval "/$esc/";
$self->{'ERROR'} = $@ if $@;
}

$self->{'ESCAPE'} = $esc;

if ($self->{'DEBUG'}) {
print "ESCAPE: $esc";
print ": ", $self->{'ERROR'} if $self->{'ERROR'};
print "\n";
}

return $curesc;
}

sub double_escape {
my $self = shift;
my $esc = shift;
my $curesc = $self->{'DBLESCAPE'};
local $_ = "no -w warning in evals now";

$esc = '[' . quotemeta($esc) . ']' if defined($esc) && ($esc ne "");

if (defined($esc) && ($esc ne "")) {
eval "/$esc/";
$self->{'ERROR'} = $@ if $@;
}

$self->{'DBLESCAPE'} = $esc;

if ($self->{'DEBUG'}) {
print "DBLESC: $esc";
print ": ", $self->{'ERROR'} if $self->{'ERROR'};
print "\n";
}

return $curesc;
}

sub case_sensitive {
my $self = shift;
my $setsense = shift;
my $cursense = $self->{'CASESENSE'};

$self->{'CASESENSE'} = $setsense || !defined($setsense);

print "CASE : ", $self->{'CASESENSE'}, "\n"
if $self->{'DEBUG'};

return $cursense;
}

sub slow {
my $self = shift;
my $setslow = shift;
my $curslow = $self->{'FORCESLOW'};

$self->{'FORCESLOW'} = $setslow || !defined($setslow);

print "GOSLOW: ", $self->{'FORCESLOW'}, "\n"
if $self->{'DEBUG'};

return $curslow;
}

sub keep {
my $self = shift;
my $setkeep = shift;
my $curkeep = $self->{'KEEP'};

$self->{'KEEP'} = $setkeep || !defined($setkeep);

print "KEEP : ", $self->{'KEEP'}, "\n"
if $self->{'DEBUG'};

return $curkeep;
}

sub returndelim {
my $self = shift;
my $setrd = shift;
my $currd = $self->{'RETURNDELIM'};

$self->{'RETURNDELIM'} = $setrd || !defined($setrd);

print "RETURNDELIM : ", $self->{'RETURNDELIM'}, "\n"
if $self->{'DEBUG'};

return $currd;
}

sub debug {
my $self = shift;
my $setdebug = shift;
my $curdebug = $self->{'DEBUG'};

$self->{'DEBUG'} = $setdebug || !defined($setdebug);

print "DEBUG : ", $self->{'DEBUG'}, "\n"
if $self->{'DEBUG'};

return $curdebug;
}

sub error {
my $self = shift;
my $seterr = shift;
my $curerr = $self->{'ERROR'};

$self->{'ERROR'} = $seterr if defined($seterr);
return $curerr;
}

sub pre_matched {
my $self = shift;
$self->{'ERROR'} = "pre_matched requires keep" if !$self->{'KEEP'};
return $self->{'PRE'};
}

sub matched {
my $self = shift;
$self->{'ERROR'} = "matched requires keep" if !$self->{'KEEP'};
return $self->{'MATCH'};
}

sub post_matched {
my $self = shift;
$self->{'ERROR'} = "post_matched requires keep" if !$self->{'KEEP'};
return $self->{'POST'};
}

sub dump {
my $self = shift;
my ($key, $val);

print "Dump of Text::DelimMatch:\n";

print "\n\tERROR : ", $self->{'ERROR'}, "\n"
if $self->{'ERROR'};

print "\tStart : ", $self->{'STARTREGEXP'}, "\n";
print "\tEnd : ", $self->{'ENDREGEXP'}, "\n";
print "\tEscape: ", $self->{'ESCAPE'}, "\n";
print "\tDblEsc: ", $self->{'DBLESCAPE'}, "\n";
print "\tDebug : ", $self->{'DEBUG'}, "\n";
print "\tCase : ", $self->{'CASESENSE'}, "\n";
print "\tSlow : ", $self->{'FORCESLOW'}, "\n";
print "\tKeep : ", $self->{'KEEP'}, "\n";
print "\tQuote :\n";
while (($key, $val) = each %{$self->{'QUOTE'}}) {
print "\t\t$key ... $val\n";
}
print "\tBuffer: ", $self->{'BUFFER'}, "\n";
print "\tPrefix: ", $self->{'PRE'}, "\n";
print "\tMatch : ", $self->{'MATCH'}, "\n";
print "\tPost : ", $self->{'POST'}, "\n\n";
}

sub match {
my $self = shift;
my $string = shift;
my $state = 0;
my $start = $self->{'STARTREGEXP'};
my $end = $self->{'ENDREGEXP'};
my %quote = %{$self->{'QUOTE'}};
my $escape = $self->{'ESCAPE'};
my $dblesc = $self->{'DBLESCAPE'};
my $debug = $self->{'DEBUG'};
my ($startq, $endq, $specialq) = ("", "", "");
my ($done) = 0;
my ($depth) = 0;
my (@states) = ();
my ($accum) = "";
my ($regexp, $match, $pre, $matched, $post);
my ($scratch);
local $_ = "no -w warning in evals now";

return if $self->{'ERROR'};

if (defined($string)) {
$self->{'BUFFER'} = $string;
} else {
# use post of previous match, if there was a match previously
$self->{'BUFFER'} = $self->{'POST'} if $self->{'MATCH'}
}

$self->{'PRE'} = "";
$self->{'MATCH'} = "";
$self->{'POST'} = "";

if (!%quote && !$escape && !$dblesc && !$self->{'FORCESLOW'}) {
print "FAST: $start, $end\n" if $debug;
return $self->_fast0() if $start eq $end;
return $self->_fast1();
}

# build the regexp that matches the next important thing

if (%quote) {
$startq = join (")|(", keys %quote);
$startq = "($startq)";
}

if ($escape || $dblesc) {
if ($escape && $dblesc) {
$specialq = "($escape)|($dblesc)";
} elsif ($escape) {
$specialq = "($escape)";
} else {
$specialq = "($dblesc)";
}
}

$_ = $self->{'BUFFER'};
$self->{'BUFFER'} = "";
while ($state != 3) {
if ($state == 0) { # before start tag
$regexp = "($start)";
$regexp .= "|$startq" if $startq;
$regexp .= "|($escape)" if $escape;
} elsif ($state == 1) { # in start tag
$regexp = "($start)|($end)";
$regexp .= "|$startq" if $startq;
$regexp .= "|($escape)" if $escape;
} elsif ($state == 2) { # in quote
$regexp = $endq;
$regexp .= "|$specialq" if $specialq;
} else {
$self->{'ERROR'} = "BAD STATE! THIS CAN'T HAPPEN!";
return;
}

print "STATE: $state: $regexp\n" if $debug;

($pre, $matched, $post) = $self->_match($regexp, $_);

print "\tSTR : $_\n" if $debug;
print "\tPRE : $pre\n" if $debug;
print "\tMTCH: $matched\n" if $debug;
print "\tPOST: $post\n" if $debug;

last if !$matched;

# First things first, if we've encountered an escaped
# character, move along
if ($escape && $self->_match ($escape, $matched)) {
$accum .= $pre . $matched;
$accum .= substr($post, 0, 1);
$_ = substr ($post, 1);
next;
}

if ($state == 0) { # looking for start or startq
if ($self->_match($start, $matched)) { # matched start
$state = 1;
$depth++;
print "START: $depth\n" if $debug;

$self->{'PRE'} = $accum . $pre;
$accum = $matched;
$_ = $post;
} else { # (must have) matched startq
push (@states, $state);
$state = 2;
$accum .= $pre . $matched;
foreach $scratch (keys %quote) {
if ($self->_match ($scratch, $matched)) {
$endq = $quote{$scratch};
last;
}
}
$_ = $post;
}
} elsif ($state == 1) {
if ($self->_match($end, $matched)) { # matched end
$state = 1;
$depth--;

print "END : $depth\n" if $debug;
$accum .= $pre . $matched;
if ($depth == 0) {
$state = 3;
$self->{'MATCH'} = $accum;
$self->{'POST'} = $post;
$_ = "";
} else {
$_ = $post;
}
} elsif ($self->_match($start, $matched)) { # matched start
$state = 1;
$depth++;
print "START: $depth\n" if $debug;

$accum .= $pre . $matched;
$_ = $post;
} else { # (must have) matched startq
push (@states, $state);
$state = 2;
$accum .= $pre . $matched;
foreach $scratch (keys %quote) {
if ($self->_match ($scratch, $matched)) {
$endq = $quote{$scratch};
last;
}
}
$_ = $post;
}
} elsif ($state == 2) {
# case 1, matched dblesc and is a doubled char
if ($dblesc
&& $self->_match ($dblesc, $matched)
&& ($matched eq substr($post, 0, 1))) { # skip forward
$accum .= $pre . $matched;
$accum .= substr($post, 0, 1);
$_ = substr($post, 1);
next;
} # otherwise check for other things then revisit

if ($self->_match ($endq, $matched)) { # matched endq
$state = pop (@states);
$accum .= $pre . $matched;
$_ = $post;
} else { # (must have) matched a undoubled dblesc
# usually this ends a quoted string
# (and we'd never get here)
# but since it didn't, just skip along
$accum .= $pre . $matched;
$_ = $post;
}
} else {
$self->{'ERROR'} = "BAD STATE! THIS CAN'T HAPPEN!";
return;
}
}

if ($state == 3) {
$pre = $self->{'PRE'};
$match = $self->{'MATCH'};
$post = $self->{'POST'};

$match = $self->strip_delim($match) if !$self->{'RETURNDELIM'};
} else {
$self->{'PRE'} = "";
$self->{'MATCH'} = "";
$self->{'POST'} = "";
undef $pre;
undef $match;
undef $post;
}

if (!$self->{'KEEP'}) {
$self->{'PRE'} = "";
$self->{'MATCH'} = "";
$self->{'POST'} = "";
}

return wantarray ? ($pre, $match, $post) : $match;
}

sub _fast0 {
my $self = shift;
my $delim = $self->{'STARTREGEXP'};
local $_ = $self->{'BUFFER'};
my ($pre, $match, $post);

if ($self->{'CASESENSE'}) {
$match = /^(.*?)($delim.*?$delim)(.*)$/s;
($pre, $match, $post) = ($1, $2, $3);
} else {
$match = /^(.*?)($delim.*?$delim)(.*)$/si;
($pre, $match, $post) = ($1, $2, $3);
}

if ($match) {
$match = $self->strip_delim($match) if !$self->{'RETURNDELIM'};

if ($self->{'KEEP'}) {
$self->{'PRE'} = $pre;
$self->{'MATCH'} = $match;
$self->{'POST'} = $post;
}

return wantarray ? ($pre, $match, $post) : $match;
} else {
return wantarray ? (undef, undef, undef) : undef;
}
}

sub _fast1 {
my $self = shift;
my $string = $self->{'BUFFER'};
my $start = $self->{'STARTREGEXP'};
my $end = $self->{'ENDREGEXP'};
my $regexp = "($start)|($end)";
my $count = 0;
my ($match, $realpre, $pre, $post, $matched);

($realpre, $match, $post) = $self->_match($start, $string);

if (defined($match)) {
$matched = $match;
$string = $post;
$count++;

($pre, $match, $post) = $self->_match($regexp, $string);

while (defined($match)) {
$matched .= $pre . $match;

if ($self->_match($end, $match)) {
$count--;
} else {
$count++;
}

$string = $post;
last if $count == 0;

($pre, $match, $post) = $self->_match($regexp, $string);
}

if ($count == 0) {
$matched = $self->strip_delim($matched) if !$self->{'RETURNDELIM'};

if ($self->{'KEEP'}) {
$self->{'PRE'} = $realpre;
$self->{'MATCH'} = $matched;
$self->{'POST'} = $post;
}

return wantarray ? ($realpre, $matched, $post) : $matched;
}
}

return wantarray ? (undef, undef, undef) : undef;
}

sub strip_delim {
my $self = shift;
my $string = shift;
my $start = $self->{'STARTREGEXP'};
my $end = $self->{'ENDREGEXP'};
my $ok = 1;
local $_ = "no -w warning in evals now";

return if $self->{'ERROR'};

$string = $self->{'MATCH'} if !defined($string);

if ($string =~ /^$start/s) {
my($rest) = $';
if ($rest =~ /^(.*)$end$/s) {
return $1;
} else {
$self->{'ERROR'} = "FAILED TO MATCH END DELIMITER";
}
} else {
$self->{'ERROR'} = "FAILED TO MATCH START DELIMITER";
}

return;
}

sub _match {
my $self = shift;
my $regexp = shift;
local $_ = shift;
my $match = 0;
my ($pre, $matched, $post);

if ($self->{'CASESENSE'}) {
$match = /$regexp/s;
($pre, $matched, $post) = ($`, $&, $');
} else {
$match = /$regexp/si;
($pre, $matched, $post) = ($`, $&, $');
}

if ($match) {
wantarray ? ($pre, $matched, $post) : $matched;
} else {
wantarray ? (undef, undef, undef) : undef;
}
}

sub nested_match {
my ($search, $start, $end, $three) = @_;
my $mc = new Text::DelimMatch $start, $end;
my ($p, $m, $s) = $mc->match($search);

if (defined($three)) {
return wantarray ? ($p, $m, $s) : $m;
} else {
return wantarray ? ("$p$m", $s) : $m;
}
}

sub skip_nested_match {
my ($search, $start, $end, $three) = @_;
my $mc = new Text::DelimMatch $start, $end;
my ($p, $m, $s) = $mc->match($search);

if (defined($three)) {
return wantarray ? ($p, $m, $s) : $s;
} else {
return wantarray ? ("$p$m", $s) : $s;
}
}

1;
__END__

=head1 NAME

Text::DelimMatch - Perl extension to find regexp delimited strings with proper nesting

=head1 SYNOPSIS

use Text::DelimMatch;

$mc = new Text::DelimMatch, $startdelim, $enddelim;

$mc->quote('"');
$mc->escape("\\");
$mc->double_escape('"');
$mc->case_sensitive(1);

($prefix, $match, $remainder) = $mc->match($string);
($prefix, $nextmatch, $remainder) = $mc->match();

$middle = $mc->strip_delim($match); # returns $match w/o start and end delim

=head1 DESCRIPTION

These routines allow you to match delimited substrings in a
buffer. The delimiters can be specified with any regular
expression and the start and end delimiters need not be the
same. If the delimited text is properly nested, entire nested
groups are returned.

In addition, you may specify quoting and escaping characters that
contribute to the recognition of start and end delimiters.

For example, if you specify the start and end delimiters as '\(' and
'\)', respectively, and the double quote character as a quoting character,
and the backslash as an escaping character, then the delimited substring
in this buffer is "(ma(t)c\)h)":

'prefix text "(quoted text)" \(escaped \" text) (ma(t)c\)h) postfix text'

In order to support this rather complex interface, the matching context
is encapsulated in an object. The object, Text::DelimMatch,
has the following public methods:

=over 4

=item new $start, $end, $escape, $dblesc, $qs1, $qe1, ... $qsn, $qen

Creates a new object. All of the arguments are optional, and can be
set with other methods, but they must be passed in the specified order:
start delimiter, end delimiter, escape characters, double escape characters,
and a set of quote characters.

=item match $string

In an array context, returns ($pre, $match, $post) where $pre is the
text preceding the first match, $match is the matched text (including
the delimiters), and $post is the rest of the text in the buffer.
In a scalar context, returns $match.

If $string is not provided on subsequent calls, the $post from the
previous match is used, unless keep is false. If keep is false, the
match always fails.

=item strip_delim $string

Returns $string with the start and end delimiters removed.

=item delim $start, $end

Set the start and end delimiters. Only one set of delimiters can be
in use at any one time.

Returns the delimters in use before this call.

=item quote $startq, $endq

Specifies the start and end quote characters. Multiple quote
character pairs are supported, so this function is additive. To
clear the current settings, pass no arguments, e.g.,
$mc->quote().

If only $start is passed, $end is assumed to be the same.

In matching, quotes occur in pairs. In other words, if (",")
and (',') are both specified as quote pairs and a string
beginning with " is found, it is ended only by another ", not by '.

Returns the quote hash in use before this call.

=item escape $esc

Specifies a set of escaping characters. This can only be a string
of characters. $esc can be a regexp set or a simple string. If it
is a simple string, it will be translated into the regexp set
"[ quotemeta($esc) ]".

Returns the escape characters in use before this call.

=item double_escape $esc

Specifies a set of double-escaping characters, i.e., characters that
are considered escaped if they occur in pairs. For example, in some
languages,

'Don''t you see?'

defines a string containing a single apostrophe.

$esc can only be a string of characters. $esc can be a regexp
set or a simple string. If it is a simple string, it will be
translated into the regexp set "[ quotemeta($esc) ]".

Returns the double-escaping characters in use before this call.

=item case_sensitive $bool

Sets case sensitivity to $bool or true if $bool is not specified.

Returns the case sensitivity in use before this call.

=item keep $bool

Sets keep to $bool or true if $bool is not specified.

Keep, which is true by default, specifies whether or not the
matching context object keeps a local copy of the buffer used in
matching. Keeping a local copy allows repeated matching on the same
buffer, but might be a bad idea if the buffer is a terabyte long. ;-)

Returns the keep setting in use before this call.

=item returndelim $bool

Sets returndelim to $bool or true if $bool is not specified.

Returndelim, which is true by default, specifies whether or not the
start and end delimiters are returned with the matching string.

Returns the returndelim setting in use before this call.

=item error $seterr

Returns the last error that occured. If $seterr is passed, the error is
set to that value. Some common kinds of bad input are detected and an
error condition is raised. If an error condition is raised, all matching
fails until the error is cleared.

The most common error is a bad regular expression, for example specifing
the start delimiter as "(" instead of "\\(". Remember, these are regexps!

=item pre_matched

Returns the prefix text from the last match if keep is true. Sets
an error and returns an empty string if keep is false.

=item matched

Returns the matched text from the last match if keep is true. Sets
an error and returns an empty string if keep is false.

=item post_matched

Returns the postfix text from the last match if keep is true. Sets
an error and returns an empty string if keep is false.

=item debug $bool

Sets debug to $bool or true if $bool is not specified.

If debug is true, informative and progress messages are printed
to STDOUT by some methods.

Returns the debugging setting in use before this call.

=item dump

For debugging, prints all of the instance variables for a particular
object.

=item slow $bool

For debugging. Some classes of delimited strings can be located
with much faster algorithms than can be used in the most general
case. If slow is true, the slower, general algorithm is always
used.

=back

For simplicity, and backward compatibility with the previous
(limited release) incarnation of this module, the following
functions are also available directly:

=over 4

=item nested_match ($string, $start, $end, $three)

If $three is true, returns ($pre, $match, $post) in an array context
otherwise returns ("$pre$match", $post). In a scalar context, returns
"$pre$match".


=item skip_nested_match ($string, $start, $end, $three)

If $three is true, returns ($pre, $match, $post) in an array context
otherwise returns ("$pre$match", $post). In a scalar context, returns
$post.

=back

=head1 EXAMPLES

$mc = new Text::DelimMatch '"';
$mc->('pre "match" post') == '"match"';

$mc->delim("\\(", "\\)");
$mc->('pre (match) post') == ('pre ', '(match)', ' post');
$mc->('pre (ma(t)ch) post') == ('pre ', '(ma(t)ch)', ' post');

$mc->quote('"');
$mc->escape("\\");
$mc->('pre (ma")"tch) post') == ('pre ', '(ma")"tch)', ' post');
$mc->('pre (ma(t)c\)h\") post') == ('pre ', '(ma(t)c\)h\")', ' post');

See also test.pl in the distribution.

=head1 AUTHOR

Norman Walsh, ndw@nwalsh.com

=head1 COPYRIGHT

Copyright (C) 1997-2002 Norman Walsh.
All rights reserved. This program is free software; you can
redistribute it and/or modify it under the same terms as Perl itself.

=head1 WARRANTY

THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR
PURPOSE.

=head1 SEE ALSO

perl(1).

=cut

DelimMatch-1.06a/MANIFEST000064400000000000000000000000620766240426200147450ustar00rootroot00000000000000README
MANIFEST
Makefile.PL
DelimMatch.pm
test.pl
DelimMatch-1.06a/Makefile.PL000064400000000000000000000003650766240426200155740ustar00rootroot00000000000000use ExtUtils::MakeMaker;
# See lib/ExtUtils/MakeMaker.pm for details of how to influence
# the contents of the Makefile that is written.
WriteMakefile(
'NAME' => 'Text::DelimMatch',
'VERSION_FROM' => 'DelimMatch.pm', # finds $VERSION
);
DelimMatch-1.06a/README000064400000000000000000000047760766240426200145140ustar00rootroot00000000000000README for DelimMatch version 1.06, 20 May 2003

DelimMatch allows you to match delimited substrings in a buffer.
The delimiters can be specified with any regular expression and
the start and end delimiters need not be the same. If the
delimited text is properly nested, entire nested groups are
returned.

In addition, you may specify quoting and escaping characters that
contribute to the recognition of start and end delimiters.

For more detail, see the POD documentation in DelimMatch.pm

CHANGES

2003/05/20: Released 1.06, fixed a couple of places where "/s" was missing
from the regex, preventing multi-line matches from working correctly.

2002/10/02: Released 1.05, with the plan to actually upload this one to CPAN.
No real changes from 1.04, just updates to documentation.

1999/04/14: Added returndelim() and strip_delim() methods to control
whether or not the match returned includes the start and
end delimiters. By default, delimiters are returned.

1997/10/27: Released version 1.03. Tweaked the documentation a little.
Finally uploaded to PAUSE. NOTE: NestedMatch-1.03, uploaded
on 10/25 WAS AN ERROR. Use this INSTEAD.

1997/07/03: Released version 1.02
Made -w clean

1997/07/02: Released version 1.01
Support for Perl 5.004.
The scoping of $1, $2, etc. changed!?
POD buglet
Allow more arguments to constructor

Released version 1.00
This module replaces Text::NestedMatch which was only
ever in limited release (never in CPAN).

PREREQUISITES

DelimMatch is implemented as an object. It has no requirements
beyond Perl 5.000.

INSTALLATION

This module is written entirely in Perl. There's nothing to build,
and installation should be as simple as

perl Makefile.PL
make
make test
make install

To install this module by hand, simply copy DelimMatch.pm to your
Perl library directory. Test by running perl test.pl.

CONTACTING THE AUTHOR

The best way to reach me is by email to <ndw@nwalsh.com>
You will find additional contact information at http://nwalsh.com/

COPYRIGHT

Copyright (C) 1997-2003 Norman Walsh.
All rights reserved. This program is free software; you can
redistribute it and/or modify it under the same terms as Perl itself.

WARRANTY

THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR
PURPOSE.
DelimMatch-1.06a/test.pl000064400000000000000000000131670766240426200151420ustar00rootroot00000000000000# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl test.pl'

######################### We start with some black magic to print on failure.

# Change 1..1 below to 1..last_test_to_print .
# (It may become useful if the test is moved to ./t subdirectory.)

BEGIN { $| = 1; print "1..47\n"; }
END {print "not ok 1\n" unless $loaded;}
use Text::DelimMatch;
$loaded = 1;
print "ok 1\n";

######################### End of black magic.

# Insert your test code below (better if it prints "ok 13"
# (correspondingly "not ok 13") depending on the success of chunk 13
# of the test code):

$mc = new Text::DelimMatch '"';

# test: simple delimited text, fast
&test (2, 'pre "match" post', 'pre ', '"match"', ' post');
&test (3, '"match" post', '', '"match"', ' post');
&test (4, 'pre "match"', 'pre ', '"match"', '');
&test (5, 'no match" post');
&test (6, 'pre "no match');

# test: simple delimited text, slow
$mc->slow(1);
&test (7, 'pre "match" post', 'pre ', '"match"', ' post');
&test (8, '"match" post', '', '"match"', ' post');
&test (9, 'pre "match"', 'pre ', '"match"', '');
&test (10, 'no match" post');
&test (11, 'pre "no match');

# test: delimited text, fast
$mc->slow(0);
$mc->delim("\\(", "\\)");

&test (12, 'pre (m(a(t)c)h) post', 'pre ', '(m(a(t)c)h)', ' post');
&test (13, '(m(a(t)c)h) post', '', '(m(a(t)c)h)', ' post');
&test (14, 'pre (m(a(t)c)h)', 'pre ', '(m(a(t)c)h)', '');
&test (15, '(no match post');
&test (16, 'no match) post');
&test (17, 'pre (no match');
&test (18, 'pre no match)');

# test: delimited text, slow
$mc->slow(1);
&test (19, 'pre (m(a(t)c)h) post', 'pre ', '(m(a(t)c)h)', ' post');
&test (20, '(m(a(t)c)h) post', '', '(m(a(t)c)h)', ' post');
&test (21, 'pre (m(a(t)c)h)', 'pre ', '(m(a(t)c)h)', '');
&test (22, '(no match post');
&test (23, 'no match) post');
&test (24, 'pre (no match');
&test (25, 'pre no match)');

# test: delimited text, skipping quotes
$mc->slow(0);
$mc->quote('"');

&test (26, 'pre "(no match)" "(no" "match)" (match) post',
'pre "(no match)" "(no" "match)" ', '(match)', ' post');
&test (27, '"(no match)" pre (match) post',
'"(no match)" pre ', '(match)', ' post');

# test: delimited text, complex quotes
$mc->quote();
$mc->quote('<!--', '-->');
&test (28, 'pre <!-- ( --> (match) post',
'pre <!-- ( --> ', '(match)', ' post');

# test: delimited text, escaped characters
$mc->quote();
$mc->escape('\\');
&test (29, 'pre \(no match) (match) post',
'pre \(no match) ', '(match)', ' post');

&test (30, 'pre (match \) this) post',
'pre ', '(match \) this)', ' post');

# test: delimited text, doubled quotes
$mc->quote ('"');
$mc->escape();
&test (31, 'pre "(no match)" (no match)" (match) post',
'pre "(no match)" ', '(no match)', '" (match) post');

$mc->double_escape ('"');
&test (32, 'pre "(no match)"" (no match)" (match) post',
'pre "(no match)"" (no match)" ', '(match)', ' post');

# test: a little of everything

$mc->escape('[;\\]');
&test (33, ';(no \(match)\) \(no match) \"(m(a\)tc)h)',
';(no \(match)\) \(no match) \"', '(m(a\)tc)h)', '');

&test (34, '"(no match) "" (no match) \""\((m"a)t"ch)',
'"(no match) "" (no match) \""\(', '(m"a)t"ch)', '');

# test: repeated matching

&test (35, 'pre (match) (match2) post',
'pre ', '(match)', ' (match2) post');

&test (36, undef,
' ', '(match2)', ' post');

# test: case sensitivity

$mc->delim('S', 'E');

&test (37, "pre SmastechE post", "pre ", "SmastechE", " post");
&test (38, "pre smaStEche post", "pre ", "smaStEche", " post");

$mc->case_sensitive(1);

&test (39, "snomatche SmastechE post", "snomatche ", "SmastechE", " post");
&test (40, "snomatche smaStEche post", "snomatche sma", "StE", "che post");

# test: backward compatability functions

if (&Text::DelimMatch::nested_match ("pre (match) ", "\\(", "\\)") eq "(match)") {
print "ok 41\n";
} else {
print "not ok 41\n";
}

if (&Text::DelimMatch::skip_nested_match (" (match)post", "\\(", "\\)") eq "post") {
print "ok 42\n";
} else {
print "not ok 42\n";
}

# test: turning off repeated matching

$mc->keep(0);

$mc->delim("\\(", "\\)");

&test (43, 'pre (match) (match2) post',
'pre ', '(match)', ' (match2) post');

&test (44, undef);

# test: strip delimiters

$mc = new Text::DelimMatch '\(', '\)';
$mc->returndelim(1);

$match = $mc->match("test ((this is) a test) so there");

if ($match eq '((this is) a test)') {
print "ok 45\n";
} else {
print "not ok 45\n";
}

$mc->returndelim(0);

$match = $mc->match("test ((this is) a test) so there");

if ($match eq '(this is) a test') {
print "ok 46\n";
} else {
print "not ok 46 ($match)\n";
}

# test: multi-line matching with delimiter stripping

$match = $mc->match("test ((this is)\na test) so there");

if ($match eq "(this is)\na test") {
print "ok 47\n";
} else {
print "not ok 47 ($match)\n";
}

exit;

sub test {
my ($tnum, $string, $okpre, $okmatch, $okpost) = @_;
my ($pre, $match, $post) = $mc->match($string);

# check this first, to avoid -w errors
if (!defined($match) && !defined($okmatch)) {
print "ok $tnum\n";
return;
}

# print "1: ", defined($pre), " ";
# print "2: ", defined($okpre), " ";
# print "3: ", defined($match), "($match) ";
# print "4: ", defined($okmatch), " ";
# print "5: ", defined($post), " ";
# print "6: ", defined($okpost), "\n";

if (($pre ne $okpre)
|| ($match ne $okmatch)
|| ($post ne $okpost)) {
print "not ok $tnum\n";
print "\t\"$pre\" =?= \"$okpre\"\n";
print "\t\"$match\" =?= \"$okmatch\"\n";
print "\t\"$post\" =?= \"$okpost\"\n";
$mc->dump();
exit 1;
} else {
print "ok $tnum\n";
}
}

 
design & coding: Vladimir Lettiev aka crux © 2004-2005, Andrew Avramenko aka liks © 2007-2008
current maintainer: Michael Shigorin