RE: [xsl] Using key to group large documents

Subject: RE: [xsl] Using key to group large documents
From: "Michael Kay" <mhk@xxxxxxxxx>
Date: Thu, 18 Mar 2004 10:13:17 -0000
You've got four nested loops here:

   <xsl:for-each
 
select="//person[generate-id(.)=generate-id(key('cat',class-number)[1])]">

      <xsl:for-each select="$uniqueMembersByCat[class-number=$theCat]">

         <xsl:for-each select="$contractsByMemberAndCat[class-number=$theCat
and 
                               person-number=$thePerson]">
            <xsl:apply-templates 

                select="//bulk-benefit-item[person/class-number=$theCat and 
                        person/person-number=$thePerson and 
                        person/contract-number=$theContract]" />

and as far as I can tell, each of them is iterating over a node-set whose
size is proportional to the size of the data-set (this might not be true,
you might have a fixed number of contracts/categories, I can't tell).

I would have thought that each of the three inner loops would benefit from
using a key, since each of them uses a predicate with an "=" condition. In
fact, you seem to have defined the keys, but you don't seem to be using
them.

Michael Kay 

# -----Original Message-----
# From: owner-xsl-list@xxxxxxxxxxxxxxxxxxxxxx 
# [mailto:owner-xsl-list@xxxxxxxxxxxxxxxxxxxxxx] On Behalf Of 
# Heinz Seldte
# Sent: 18 March 2004 06:38
# To: 'xsl-list@xxxxxxxxxxxxxxxxxxxxxx'
# Subject: RE: [xsl] Using key to group large documents
# 
# ------------------------------
# 
# Date: Wed, 17 Mar 2004 10:01:11 -0000
# From: "Michael Kay" <mhk@xxxxxxxxx>
# Subject: RE: [xsl] Using key to group large documents
# 
# You've only shown us one line of your code. It's almost 
# certainly possible to improve it, but you will have to 
# publish it first.
# 
# Michael Kay
# ---------------------------------
# The detail is quite a lot - i trimmed the input to only 
# contain enough to get the idea accross.
# I include my attempt at grouping the information using many 
# keys. The result is correctly grouped (Category, Member, 
# Contract, Benefit) but I still have some performance problems 
# on large (2Meg+) files.
# 
# XML:
# ==============================================================
# [XML start] <?xml version='1.0' ?> <bulk-benefit-statement>
#     <bulk-benefit>
#         <bulk-benefit-items>
#             <bulk-benefit-item>
#                 <person>
#                     <person-number>20108</person-number>
#                     <contract-number>100</contract-number>
#                     <class-number>1</class-number>
#                     <firstname>Jack</firstname>
#                 </person>
#                 <benefit>
#                     <lots-of-elements>foo</lots-of-elements>
#                 </benefit>
#             </bulk-benefit-item>
# 
#             <bulk-benefit-item>
#                 <person>
#                     <person-number>20108</person-number>
#                     <contract-number>100</contract-number>
#                     <class-number>1</class-number>
#                     <firstname>Jack</firstname>
#                 </person>
#                 <benefit>
#                     <lots-of-elements>foo</lots-of-elements>
#                 </benefit>
#             </bulk-benefit-item>
#         
#             <bulk-benefit-item>
#                 <person>
#                     <person-number>20108</person-number>
#                     <contract-number>100</contract-number>
#                     <class-number>2</class-number>
#                     <firstname>Jill</firstname>
#                 </person>
#                 <benefit>
#                     <lots-of-elements>foo</lots-of-elements>
#                 </benefit>
#             </bulk-benefit-item>
#         
#          </bulk-benefit-items>
#     </bulk-benefit>
# </bulk-benefit-statement>
# ==============================================================
# [XML stop] And then the XSL:
# ==============================================================
# [XSL start] <xsl:stylesheet version="1.0"
# xmlns:xsl="http://www.w3.org/1999/XSL/Transform";>
#    <xsl:output method="xml" encoding="UTF-8" version="1.0"
# omit-xml-declaration="no" standalone="yes" indent="yes" />
# 
#    <xsl:key name="cat" match="//person" use="class-number" />
# 
#    <xsl:key name="per" match="//person"
# use="concat(class-number,concat(person-number, contract-number))" />
# 
#    <xsl:key name="membersByCat" match="//person"
# use="concat(person-number,class-number)" />
# 
#    <xsl:key name="contracts" match="//person" use="contract-number" />
# 
#    <xsl:variable name="uniqueMembersByCat"
# select="//person[generate-id(.)=generate-id(key('membersByCat'
# ,concat(person
# -number,class-number))[1])]" />
# 
#    <xsl:variable name="uniqueContracts"
# select="//person[generate-id(.)=generate-id(key('contracts',co
# ntract-number)
# [1])]" />
# 
#    <xsl:variable name="contractsByMemberAndCat"
# select="//person[generate-id(.)=generate-id(key('per',concat(c
# lass-number,co
# ncat(person-number, contract-number)))[1])]" />
# 
#    <xsl:template match="bulk-benefit-statement">
#       <display>
#          <categories>
#             <xsl:for-each
# select="//person[generate-id(.)=generate-id(key('cat',class-nu
# mber)[1])]">
#                <xsl:variable name="theCat" select="class-number" />
#                <category>
#                   <xsl:attribute name="category">
#                      <xsl:value-of select="class-number" />
#                   </xsl:attribute>
#                   <xsl:for-each
# select="$uniqueMembersByCat[class-number=$theCat]">
#                      <xsl:variable name="thePerson" 
# select="person-number"
# />
#                      <member>
#                         <xsl:attribute name="number">
#                            <xsl:value-of select="person-number" />
#                         </xsl:attribute>
#                         <contracts>
#                            <xsl:for-each 
# select="$contractsByMemberAndCat[class-number=$theCat and 
# person-number=$thePerson]">
#                               <xsl:variable name="theContract"
# select="contract-number" />
#                               <contract>
#                                  <xsl:attribute name="number">
#                                     <xsl:value-of 
# select="contract-number"
# />
#                                  </xsl:attribute>
#                                  <!-- Now keep the stuf I 
# need for the items
# -->
#                                  <xsl:apply-templates 
# select="//bulk-benefit-item[person/class-number=$theCat and 
# person/person-number=$thePerson and 
# person/contract-number=$theContract]" />
#                               </contract>
#                            </xsl:for-each>
#                         </contracts>
#                      </member>
#                   </xsl:for-each>
#                </category>
#             </xsl:for-each>
#          </categories>
#       </display>
#    </xsl:template>
# 
# 
#    <xsl:template match="bulk-benefit-item">
#       <xsl:if test="position() = 1">
#           <!-- here I copy all the elements that are repeated 
# in the input for all benefits -->
#       </xsl:if>
#       <!--Then I grab the benefit -->
#       <xsl:copy-of select="benefit" />
#    </xsl:template>
#    
# 
# </xsl:stylesheet>
# ==============================================================
# [XSL start]
# 
# Any suggestions will be happily received!!
# 
# Regards,
# Heinz Seldte
# 
#  XSL-List info and archive:  http://www.mulberrytech.com/xsl/xsl-list
# 


 XSL-List info and archive:  http://www.mulberrytech.com/xsl/xsl-list


Current Thread