[xsl] Enjoy! (was: Re: [xsl] Limit the length of transformed content from source text nodes)

Subject: [xsl] Enjoy! (was: Re: [xsl] Limit the length of transformed content from source text nodes)
From: Blue Gecko <bluegecko@xxxxxxxxx>
Date: Tue, 01 Nov 2005 10:40:40 +0100
I've improved the hint suggested by David Carlisle (see below), allowing a snipping at arbitrary depth.

I'm pleased to share this humble piece of code, in case that someone else could need such a functionality, possibly improving it further on; I remind you that this stylesheet aims to emulate the shrinking mechanism commonly used in content summaries, where the bodies of available articles are snipped to their first words followed by ellipsis (...).

The current main limitation is that it performs just an identity transformation, so I had to arrange a double-pass transformation in order to manipulate my content semantics.

(Many thanks to David Carlisle)

==============

<!-- INPUT -->
<foo>
<content>
<section>
<title>1st section</title>
<content>
This is the <strong>1st part</strong> of an article interspersed with <strong>markup tags</strong> I'm striving to snip.
</content>
</section>
<section>
<title>2nd section</title>
<content>
<i>This is the <strong>2nd part</strong> of an article</i> interspersed with <strong>markup tags</strong> I'm striving to snip.
</content>
</section>
</content>
</foo>


==============

<!-- XSLT -->
<xsl:stylesheet
 xmlns:xsl="http://www.w3.org/1999/XSL/Transform";
 version="1.0"
 >
 <xsl:variable name="snipSize" select="25"/>

 <xsl:template match="/">
  <root>
   <xsl:apply-templates mode="snippet" select="foo/content/node()[1]"/>
  </root>
 </xsl:template>

 <xsl:template match="*|text()" mode="snippet">
  <xsl:param name="currentLength" select="0"/>

<!-- Cumulative length of the current node. -->
<xsl:variable name="newLength" select="string-length(normalize-space(.)) + $currentLength"/>


  <xsl:choose>
   <!-- Text node -->
   <xsl:when test="self::text()">
    <xsl:choose>
     <!-- Does the text fit the lasting room? -->
     <xsl:when test="$newLength &lt;= $snipSize">
      <!-- Output the text content! -->
      <xsl:value-of select="normalize-space(.)"/>

<!-- Go to the next sibling (horizontal axis)! -->
<xsl:apply-templates select="following-sibling::node()[1]" mode="snippet">
<xsl:with-param name="currentLength" select="$newLength"/>
</xsl:apply-templates>
</xsl:when>
<!-- The lasting room is not enough. -->
<xsl:otherwise>
<!-- Snip the text (we can stop now, guys)! -->
<xsl:value-of select="substring(normalize-space(.), 1, $snipSize - $currentLength)"/>


      <xsl:text>[...]</xsl:text>
     </xsl:otherwise>
    </xsl:choose>
   </xsl:when>
   <!-- Any other kind of node -->
   <xsl:otherwise>
    <!-- Apply the identity transformation to the current node! -->
    <xsl:copy>
     <xsl:copy-of select="@*"/>

     <!-- Go deep inside the subnode hierarchy (vertical axis)! -->
     <xsl:apply-templates select="child::node()[1]" mode="snippet">
      <xsl:with-param name="currentLength" select="$currentLength"/>
     </xsl:apply-templates>
    </xsl:copy>

<!-- Is there any lasting room? -->
<xsl:if test="$newLength &lt;= $snipSize">
<!-- Go to the next sibling (horizontal axis)! -->
<xsl:apply-templates select="following-sibling::node()[1]" mode="snippet">
<xsl:with-param name="currentLength" select="$newLength"/>
</xsl:apply-templates>
</xsl:if>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>


==============

<!-- OUTPUT -->
<?xml version="1.0" encoding="UTF-8"?>
<root>
 <section>
  <title>1st section</title>
  <content>This is the<strong>1st[...]</strong></content>
 </section>
</root>

==============

David Carlisle wrote:
if you need to snip at arbitrary depth it gets harder, this just adds
complete elements until you are too long or snips top level text

==============

<foo>
<item>This is the <strong>body</strong> of an article interspersed with <strong>markup tags</strong> I'm striving to snip.</item>
</foo>


==============

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"; version="1.0">

<xsl:variable name="tl" select="26"/>

<xsl:template match="foo">
<snippets>
<xsl:apply-templates/>
</snippets>
</xsl:template>
<xsl:template match="item">
<snippet>
<xsl:apply-templates mode="s" select="node()[1]"/>
</snippet>
</xsl:template>


<xsl:template match="strong">
<i>
<xsl:apply-templates/>
</i>
</xsl:template>
<xsl:template match="*" mode="s">
<xsl:param name="l" select="0"/>
<xsl:variable name="x">
<xsl:apply-templates select="."/>
</xsl:variable>
<xsl:variable name="xl" select="string-length($x)"/>
<xsl:choose>
<xsl:when test="$l + $xl &lt;= $tl">
<xsl:copy-of select="$x"/>
<xsl:apply-templates select="following-sibling::node()[1]" mode="s">
<xsl:with-param name="l" select="$l+$xl"/>
</xsl:apply-templates>
</xsl:when>
<xsl:otherwise>[...]</xsl:otherwise>
</xsl:choose>
</xsl:template>


<xsl:template match="text()" mode="s">
<xsl:param name="l" select="0"/>

<xsl:variable name="xl" select="string-length(.)"/>
<xsl:choose>
<xsl:when test="$l + $xl &lt;= $tl">
 <xsl:value-of select="."/>
  <xsl:apply-templates select="following-sibling::node()[1]" mode="s">
    <xsl:with-param name="l" select="$l+$xl"/>
  </xsl:apply-templates>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="substring(.,1,$l + $tl - $xl)"/>[...]</xsl:otherwise>
</xsl:choose>
</xsl:template>

</xsl:stylesheet>

==============

$ saxon snip.xml snip.xsl
<?xml version="1.0" encoding="utf-8"?><snippets>
<snippet>This is the <i>body</i> of an a[...]</snippet>
</snippets>

Current Thread