Re: [xsl] grouping and word counting

Subject: Re: [xsl] grouping and word counting
From: "Martin Rowlinson" <marrow@xxxxxxxxxxxxxx>
Date: Sat, 19 Jul 2003 18:19:38 +0100
Hi Marina,

"marina" <marina777uk@xxxxxxxxx> wrote...
> 1) Find out how many messages over all were sent to 1,
> 2, 3 etc people.
> 
> As a duplicated message will always follow the
> original, i.e. be the next <MESSAGE> tag of the
> following sibling node, I'm thinking that the
> stylesheet would start with the first message and keep comparing 
> siblings until it found one that was different. Then it would just add

> the previous number of sibling nodes? ( I probably need to use keys?)

I think you will need to do that as a first task - i.e. restructure the
XML so that the duplicated messages become one message.  That task, in
isolation, might look something like...

== XSL1 ===========================================
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform";>
<xsl:output method="xml" indent="yes"/>

<xsl:template match="LOG">
  <output>
    <!-- look for distinct message starts -->
    <xsl:for-each select="SENT[concat(USER,'|',LOCATION,'|',MESSAGE) !=
concat(preceding-sibling::SENT[1]/USER,'|',preceding-sibling::SENT[1]/LO
CATION,'|',preceding-sibling::SENT[1]/MESSAGE)]">
      <msg>
        <msg-text>
          <!-- remove leading/trailing whitespace of the message text
-->
          <xsl:value-of select="normalize-space(MESSAGE)"/>
        </msg-text>
        <!-- get the count of targets of this distinct message -->
        <xsl:variable name="next-msg-id"
select="generate-id(following-sibling::SENT[concat(USER,'|',LOCATION,'|'
,MESSAGE) !=
concat(current()/USER,'|',current()/LOCATION,'|',current()/MESSAGE)][1])
"/>
        <target-count>
          <xsl:value-of select="count(. |
following-sibling::SENT[generate-id(following-sibling::SENT[concat(USER,
'|',LOCATION,'|',MESSAGE) !=
concat(current()/USER,'|',current()/LOCATION,'|',current()/MESSAGE)][1])
= $next-msg-id])"/>
        </target-count>
        <!-- copy some of the info - just so that you can see which
message is which -->
        <xsl:copy-of select="USER | LOCATION"/>
      </msg>
    </xsl:for-each>
  </output>
</xsl:template>
</xsl:stylesheet>
== end of XSL1 ====================================

Then you could use that restructured XML (in one stylesheet - using
RTFs) to create the final table, e.g.

== XSL2 ===========================================
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform";
 xmlns:exslt="http://exslt.org/common";>
<xsl:output method="html" indent="yes"/>
<!-- key for finding distinct target counts -->
<xsl:key name="kDistinctTargetCount" match="msg" use="target-count"/>
<!-- re-build XML so that distinct messages become one message -->
<xsl:variable name="rtf-distinct-msgs">
  <!-- look for distinct message starts -->
  <xsl:for-each select="LOG/SENT[concat(USER,'|',LOCATION,'|',MESSAGE)
!=
concat(preceding-sibling::SENT[1]/USER,'|',preceding-sibling::SENT[1]/LO
CATION,'|',preceding-sibling::SENT[1]/MESSAGE)]">
    <msg>
      <msg-text>
        <!-- remove leading/trailing whitespace of the message text -->
        <xsl:value-of select="normalize-space(MESSAGE)"/>
      </msg-text>
      <!-- get the count of targets of this distinct message -->
      <xsl:variable name="next-msg-id"
select="generate-id(following-sibling::SENT[concat(USER,'|',LOCATION,'|'
,MESSAGE) !=
concat(current()/USER,'|',current()/LOCATION,'|',current()/MESSAGE)][1])
"/>
      <target-count>
        <xsl:value-of select="count(. |
following-sibling::SENT[generate-id(following-sibling::SENT[concat(USER,
'|',LOCATION,'|',MESSAGE) !=
concat(current()/USER,'|',current()/LOCATION,'|',current()/MESSAGE)][1])
= $next-msg-id])"/>
      </target-count>
    </msg>
  </xsl:for-each>
</xsl:variable>
<!-- convert RTF to node-set -->
<xsl:variable name="distinct-msgs"
select="exslt:node-set($rtf-distinct-msgs)/msg"/>

<xsl:template match="/">
  <html>
    <body>
      <table border="1">
        <tr>
          <th>
            <xsl:text>Group Size</xsl:text>
          </th>
          <th>
            <xsl:text>Number of Messages</xsl:text>
          </th>
          <th>
            <xsl:text>Av Number Words</xsl:text>
          </th>
        </tr>
        <!-- process the rebuilt messages by their distinct target
counts -->
        <xsl:apply-templates select="$distinct-msgs[generate-id() =
generate-id(key('kDistinctTargetCount',target-count))]">
          <xsl:sort select="target-count" data-type="number"/>
        </xsl:apply-templates>
      </table>
    </body>
  </html>
</xsl:template>

<xsl:template match="msg">
  <!-- get the messages that have this same target count -->
  <xsl:variable name="also-msgs"
select="key('kDistinctTargetCount',target-count)"/>
  <!-- count them -->
  <xsl:variable name="count-also-msgs" select="count($also-msgs)"/>
  <tr>
    <td>
      <xsl:value-of select="target-count"/>
    </td>
    <td>
      <xsl:value-of select="$count-also-msgs"/>
    </td>
    <!-- get word counts -->
    <xsl:variable name="rtf-word-counts">
      <xsl:for-each select="$also-msgs">
        <word-count>
          <!-- crude word count is length of string - length of string
without spaces -->
          <xsl:value-of select="string-length(msg-text) -
string-length(translate(msg-text,' &#10;&#13;&#09;','')) + 1"/>
        </word-count>
      </xsl:for-each>
    </xsl:variable>
    <td>
      <xsl:value-of
select="sum(exslt:node-set($rtf-word-counts)/word-count) div
$count-also-msgs"/>
    </td>
  </tr>
</xsl:template>
</xsl:stylesheet>
== end of XSL2 ====================================

Hope this helps
Marrow
http://www.marrowsoft.com - home of Xselerator (XSLT IDE and debugger)
http://www.topxml.com/Xselerator




 XSL-List info and archive:  http://www.mulberrytech.com/xsl/xsl-list


Current Thread