[xsl] Natural sort of strings

Subject: [xsl] Natural sort of strings
From: "Vladimir Nesterovsky" <vladimir@xxxxxxxxxxxxxxxxxxxx>
Date: Thu, 24 Feb 2011 02:56:28 -0800
Hello!

I needed to sort strings in "natural" order. That means that I needed 
output like this:

"item1/1"
"item2/1"
"item2/2"
"item2/12"
"item3"
"item4"
"item5"
"item6"
"item7"
"item8"
"item9"
"item10/1"
"item11"
"item12"
"item13"
"item14"
"item15"
"item16"
"item17"
"item18"
"item19"
"item20"

The task looks simple but my implementation is surprisingly untrivial.
Is there simple one?

<xsl:stylesheet version="2.0" 
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform";
  xmlns:xs="http://www.w3.org/2001/XMLSchema";
  xmlns:t="http://www.nesterovsky-bros.com/xslt/public";
  exclude-result-prefixes="t xs">

  <xsl:template match="/" name="main">
    <xsl:variable name="items" as="xs:string*">
      <xsl:for-each select="1 to 20">
        <xsl:sequence select="concat('item', .)"/>
      </xsl:for-each>

      <xsl:sequence select="'item1/1'"/>
      <xsl:sequence select="'item10/1'"/>
      <xsl:sequence select="'item2/1'"/>
      <xsl:sequence select="'item2/2'"/>
      <xsl:sequence select="'item2/12'"/>
    </xsl:variable>

    <xsl:variable name="regular-sort" as="xs:string*">
      <xsl:perform-sort select="$items">
        <xsl:sort select="." order="ascending"/>
      </xsl:perform-sort>
    </xsl:variable>

    <xsl:variable name="natural-sort" as="xs:string*"
      select="t:natural-sort($regular-sort, true())"/>

    <xsl:message>
      <xsl:text>Regular sort:
</xsl:text>

      <xsl:for-each select="$regular-sort">
        <xsl:value-of select="."/>
        <xsl:text>
</xsl:text>
      </xsl:for-each>

      <xsl:text>
Natural sort:
</xsl:text>

      <xsl:for-each select="$natural-sort">
        <xsl:value-of select="."/>
        <xsl:text>
</xsl:text>
      </xsl:for-each>
    </xsl:message>
  </xsl:template>

  <!--
    Sorts strings in "natural" order.
      $values - values to sort.
      $ascending - true for ascending, and false for descending order.
      Returns an ordered sequence of values.
  -->
  <xsl:function name="t:natural-sort" as="xs:string*">
    <xsl:param name="values" as="xs:string*"/>
    <xsl:param name="ascending" as="xs:boolean"/>

    <xsl:variable name="indices" as="xs:integer*"
      select="t:natural-sort-indices($values, $ascending, false())"/>

    <xsl:sequence select="
      for $i in $indices return
        $values[$i]"/>
  </xsl:function>

  <!--
    Natural sort implementation.
      $values - values to sort.
      $ascending - true for ascending, and false for descending order.
      $number - true for a number, and false for a non number start 
prefix.
      Returns an ordered sequence of value indices.
  -->
  <xsl:function name="t:natural-sort-indices" as="xs:integer*">
    <xsl:param name="values" as="xs:string*"/>
    <xsl:param name="ascending" as="xs:boolean"/>
    <xsl:param name="number" as="xs:boolean"/>

    <xsl:for-each-group select="1 to count($values)" 
      group-by="
        for 
          $i in .,
          $value in $values[$i]
        return
        (
          if ($number) then
            t:number-prefix($value)
          else
            t:non-number-prefix($value)
        )">
      
      <xsl:sort 
        select="
          if ($number) then
            xs:integer(current-grouping-key())
          else
            current-grouping-key()"
        order="{
          if ($ascending) then
            'ascending'
          else
            'descending'
          }"/>

      <xsl:variable name="start" as="xs:integer" 
        select="string-length(current-grouping-key()) + 1"/>
      <xsl:variable name="group" as="xs:integer+" 
select="current-group()"/>

      <xsl:choose>
        <xsl:when test="count($group) = 1">
          <xsl:sequence select="$group"/>
        </xsl:when>
        <xsl:otherwise>
          <xsl:variable name="next" as="xs:integer+" select="
            t:natural-sort-indices
            (
              (
                for $i in $group return
                  substring($values[$i], $start)
              ),
              $ascending,
              not($number)
            )"/>

          <xsl:sequence select="
            for $i in $next return
              $group[$i]"/>
        </xsl:otherwise>
      </xsl:choose>
    </xsl:for-each-group>
  </xsl:function>

  <!--
    Gets a number prefix for a value.
      $value - a value to get prefix for.
      Returns a value prefix.
  -->
  <xsl:function name="t:number-prefix" as="xs:string?">
    <xsl:param name="value" as="xs:string"/>

    <xsl:variable name="parts" as="xs:string*">
      <xsl:analyze-string select="$value" regex="^[0-9]+">
        <xsl:matching-substring>
          <xsl:sequence select="."/>
        </xsl:matching-substring>
      </xsl:analyze-string>
    </xsl:variable>

    <xsl:sequence select="$parts[1]"/>
  </xsl:function>

  <!--
    Gets a non number prefix for a value.
      $value - a value to get prefix for.
      Returns a value prefix.
  -->
  <xsl:function name="t:non-number-prefix" as="xs:string?">
    <xsl:param name="value" as="xs:string"/>

    <xsl:variable name="parts" as="xs:string*">
      <xsl:analyze-string select="$value" regex="^[^0-9]+">
        <xsl:matching-substring>
          <xsl:sequence select="."/>
        </xsl:matching-substring>
      </xsl:analyze-string>
    </xsl:variable>

    <xsl:sequence select="$parts[1]"/>
  </xsl:function>

</xsl:stylesheet>

Thanks
--
Vladimir Nesterovsky
http://www.nesterovsky-bros.com/

Current Thread