Re: [xsl] Processing two documents, which order?

Subject: Re: [xsl] Processing two documents, which order?
From: Wolfgang Laun <wolfgang.laun@xxxxxxxxx>
Date: Fri, 8 Apr 2011 12:39:55 +0200
OK, it isn't XSLT, but it produces an optimzed regex for matching any
number of words.
Change the lines after __DATA__, or modify the reading to take stdin.
.W

#! /usr/bin/perl -w
use strict;

package Trie;

sub new {
    my( $class, $letter ) = @_;
    return bless { letter => $letter, word => 0, tries => {} }, $class;
}

sub getTrie {
    my( $self, $letter ) = @_;
    my $tries = $self->{tries};
    if( exists( $tries->{$letter} ) ){
	return $tries->{$letter};
    }
    return $tries->{$letter} = new Trie( $letter );
}

sub insertWord {
    my( $self, $word ) = @_;
    if( length( $word ) == 0 ){
	$self->{word} = 1;
    } else {
	my $head = substr( $word, 0, 1 );
        my $tail = substr( $word, 1 );
	$self->getTrie( $head )->insertWord( $tail );
    }
}

sub dumpTrie {
    my( $self ) = @_;
    for my $letter ( sort keys( %{$self->{tries}} ) ){
	print $letter, ":\n";
	$self->{tries}->{$letter}->dumpTrie();
    }
}


sub regex {
    my( $self ) = @_;
    my @keys = sort keys( %{$self->{tries}} );
    my $res = '';
    my $paren = @keys > 1 || @keys == 1 && $self->{word};
    if( @keys ){
	$res .= '(' if $paren;
	$res .= '|' if $self->{word};
	$res .= join( '|',
                      map( ( $_ . $self->getTrie( $_ )->regex() ), @keys ) );
	$res .= ')' if $paren;
    }
    return $res;
}


package main;

my @words = sort map {chomp; $_} <DATA>;

## print join( '-', @words ), "\n";

my $trie = new Trie( "root" );

for my $word ( @words ){
    $trie->insertWord( $word );
}

my $regex = $trie->regex();
print  "$regex\n";

__DATA__
bee
bonnet
bounce
bounty
burn
burst
sea
seal




On 8 April 2011 12:11, Dave Pawson <davep@xxxxxxxxxxxxx> wrote:
> On Fri, 08 Apr 2011 11:06:52 +0100
> David Carlisle <davidc@xxxxxxxxx> wrote:
>
>> On 08/04/2011 11:00, Dave Pawson wrote:
>> > Efficiency isn't an issue. The alternative is hand editing.
>> > 15 minutes run time is good with that sort of comparison
>>
>> then why the question which started this thread, which is purely an
>> efficiency question?
>
> Sorry, I was thinking of efficiency wrt hand editing.
>
>
>>
>> An alternative, as I suggested, isn't hand editing but doing it all
>> the properties at once in a single template with a single regex, and
>> just starting saxon once.
>
> I really couldn't contemplate writing such a regex to be honest David.
>
>
>
> --
>
> regards
>
> --
> Dave Pawson
> XSLT XSL-FO FAQ.
> http://www.dpawson.co.uk

Current Thread