[xsl] How to make this script faster

Subject: [xsl] How to make this script faster
From: "Mathieu Malaterre" <mathieu.malaterre@xxxxxxxxx>
Date: Thu, 15 Nov 2007 14:36:01 +0100
Hi there,

  I have a working version of an XSLT script:
http://gdcm.svn.sourceforge.net/viewvc/gdcm/Sandbox/xslt/2/

  See (*) and (**). What I would like to do is :

1. Be able to run the xslt in one pass. For now I have to run it with
<xsl:param name="extract-section" select="'C.1'"/>
then edit test.xsl file, comment the line and uncomment:
<-xsl:param name="extract-section" select="'C.2'"/>
and so on and so forth...

2. This script is seriously *slow*. I guess runnning it in one pass
should solve most of the issue, but if there was something obvious I
was missing... thanks !

-Mathieu

(*)
$ cat test.xml
<?xml version="1.0"?>
<article>
  <para>C.1 Title 1</para>
  <para>info for section C.1</para>
  <informaltable>table1</informaltable>
  <para>C.2 Title 2</para>
  <informaltable>table2</informaltable>
  <para>info for section C.2</para>
  <para>C.2.1 Title 2.1</para>
  <para>text for section C.2.1</para>
  <para>text for section C.2.1 again</para>
  <para>C.2.2 Tile 2.2</para>
  <informaltable>table for 2.2</informaltable>
  <para>text for section C.2.2</para>
</article>

(**)
$ cat test.xsl
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform";
xmlns:fn="http://www.w3.org/2005/xpath-functions"; version="2.0">

<!-- GENERAL -->

<xsl:output method="xml" indent="yes" encoding="UTF-8"/>

<!-- number of the sample section to be extracted -->
<!--xsl:param name="extract-section" select="'C.1'"/-->
<!--xsl:param name="extract-section" select="'C.2'"/-->
<!--xsl:param name="extract-section" select="'C.2.1'"/-->
<xsl:param name="extract-section" select="'C.2.2'"/>


<xsl:template match="para">
<text>
<xsl:value-of select="concat(.,'&#10;')"/>
</text>
</xsl:template>

<xsl:template match="informaltable">
<table>
<xsl:value-of select="concat(.,'&#10;')"/>
</table>
</xsl:template>

<!-- MAIN -->

<xsl:template match="/article">
  <xsl:variable name="section-number" select="concat($extract-section,' ')"/>
  <xsl:variable name="section-anchor"
select="para[starts-with(normalize-space(.),$section-number)]"/>
  <xsl:variable name="section-name"
select="substring-after(para[starts-with(normalize-space(.),$section-number)],$extract-section)"/>
  <xsl:choose>
    <xsl:when test="count($section-anchor)=1">
      <xsl:message>Info: section <xsl:value-of
select="$extract-section"/> found</xsl:message>
      <xsl:element name="section">
        <xsl:attribute name="ref" select="$extract-section"/>
        <xsl:attribute name="name" select="normalize-space($section-name)"/>
        <xsl:call-template name="copy-section-paragraphs">
          <xsl:with-param name="section-paragraphs"
select="$section-anchor/following-sibling::*"/>
        </xsl:call-template>
      </xsl:element>
      <xsl:message>Info: all paragraphs extracted</xsl:message>
    </xsl:when>
    <xsl:when test="count($section-anchor)>1">
      <xsl:message>Error: section <xsl:value-of
select="$extract-section"/> found multiple times!</xsl:message>
    </xsl:when>
    <xsl:otherwise>
      <xsl:message>Error: section <xsl:value-of
select="$extract-section"/> not found!</xsl:message>
    </xsl:otherwise>
  </xsl:choose>
</xsl:template>

<!-- TEMPLATES -->

<xsl:template name="copy-section-paragraphs">
  <xsl:param name="section-paragraphs"/>
  <xsl:variable name="current-paragraph" select="$section-paragraphs[1]"/>
  <!-- search for next section title -->
  <xsl:if test="($current-paragraph[name()='para' or
name()='informaltable']) and
not(fn:matches(normalize-space($current-paragraph),'^([A-F]|[1-9]+[0-9]?)(\.[1-9]?[0-9]+)+
'))">
    <!-- output current paragraph (close with a newline) -->
    <xsl:apply-templates select="$current-paragraph"/>
    <xsl:call-template name="copy-section-paragraphs">
      <xsl:with-param name="section-paragraphs"
select="$section-paragraphs[position()>1]"/>
    </xsl:call-template>
  </xsl:if>
</xsl:template>

</xsl:stylesheet>



-- 
Mathieu

Current Thread