RE: [xsl] complex transform vs. generated transform performance..

Subject: RE: [xsl] complex transform vs. generated transform performance..
From: "Michael Kay" <mike@xxxxxxxxxxxx>
Date: Mon, 6 Jun 2005 09:45:40 +0100
Haven't got time to give you the code, but you need to define a key that
allows you to replace

<xsl:value-of select="$record/*[local-name(.) = current()/@name]"/>

with a call on the key() function.

Michael Kay
http://www.saxonica.com/ 

> -----Original Message-----
> From: Brian Chrisman [mailto:incubus@xxxxxxxxxxxxxx] 
> Sent: 05 June 2005 23:53
> To: xsl-list@xxxxxxxxxxxxxxxxxxxxxx
> Subject: [xsl] complex transform vs. generated transform performance..
> 
> Hello All,
>  
> I slapped together a quick XSLt script to convert an XML document 
> with a simple flat structure into a postgres-optimized 
> table-load script.
> I had a schema document (XSD), so I used that to defined the 
> transformation,
> but I also used the assumption that the schema was a simple 
> 2-level document.
> 
> The XML is a reasonable size .. 19MB.
> My first attempt is a stylesheet which dynamically used the 
> information
> from the XSD to pull the fields from the XML.  This was 
> taking > 30 minutes 
> to run. 
> It seemed to work, though I only ran it on a few similarly 
> formatted test
> documents. (The smallest of which, I'm including below)
> (I used sablotron... XSLT1.0..  xsltproc/libxslt was taking 
> much longer)
> 
> In the second attempt, I converted that stylesheet to one 
> which generated
> a specific stylesheet for that XML, which did the 
> transformation in 30 seconds.
> (The results diff'ed to be identical, minus one newline).
> 
> In general, I lean towards 1-stage processes over 2-stage 
> processes when
> I don't see a real usefulness to the intermediate stage (ie. 
> modularizing
> functionality).
> 
> If anybody spots a way to optimize this first stylesheet, I'd 
> appreciate it.
> 
> If the schema document I'm including below is too big or 
> verbose, I could
> find a way to cut the example down a bit... in general 
> though, it's just
> a flat space, like the XML example below it.
> 
> Thanks,
> Brian Chrisman
> 
> 
> (hopefully my long lines or other formatting irregularities 
> don't cause a 
> huge problem here..)
> -- xsd2insertgenerator.xsl
> <xsl:stylesheet 
> 	xmlns:xsl="http://www.w3.org/1999/XSL/Transform"; 
> 	xmlns:xsd="http://www.w3.org/2001/XMLSchema"; 
> 	xmlns:od="urn:schemas-microsoft-com:officedata"
> 	exclude-result-prefixes="xsd od"
> 	version="1.0">
> 	<xsl:output method="text" omit-xml-declaration="yes"/>
> 
> 	<!-- just imports the data, does not create the table.. etc -->
> 
> 	<xsl:variable name="schema" 
> select="document('/www/MAD/MAD.xsd')"/>
> 	<xsl:variable name="table" 
> select="$schema/xsd:schema/xsd:element[1]//@ref"/>
> 	<xsl:variable name="element-list" 
> select="$schema/xsd:schema/xsd:element[@name=$schema/xsd:schem
> a/xsd:element[1]//@ref]//xsd:element"/>
> 
> 	<!-- some fields may exist *only* in the schema document -->
> 	<xsl:variable name="fields" 
> select="$schema/xsd:schema/xsd:element[@name=/xsd:schema/xsd:e
> lement[1]//@ref]//xsd:element"/>
> 
> 	<xsl:template match="/">
> 		<xsl:value-of select="concat('COPY &quot;', 
> $table, '&quot; FROM stdin;&#xA;')"/>
> 		<xsl:apply-templates select="/dataroot/*" 
> mode="one-record"/>  <!-- Flat space imports only -->
> 		<xsl:text>\.</xsl:text>
> 	</xsl:template>
> 
> 	<xsl:template match="*" mode="one-record">
> 		<xsl:variable name="record" select="."/>
> 		<!-- Note: below I am looping over the entries 
> in the XSD doc, and looking up corresponding values in the 
> XML record -->
> 		<xsl:for-each select="$element-list">
> 			<xsl:value-of 
> select="$record/*[local-name(.) = current()/@name]"/>
> 			<xsl:if test="position() &lt; 
> last()"><xsl:text>&#x9;</xsl:text></xsl:if>
> 		</xsl:for-each>
> 		<xsl:text>&#xA;</xsl:text>
> 	</xsl:template>
> 		
> </xsl:stylesheet>
> 
> -- xsd2insertgeneratorgenerator.xsl
> <xsl:stylesheet
> 	version="1.0"
> 	xmlns:xsl="http://www.w3.org/1999/XSL/Transform";
> 	xmlns:gen="dummy-namespace-for-the-generated-xslt"
> 	xmlns:xsd="http://www.w3.org/2001/XMLSchema"; 
> 	xmlns:od="urn:schemas-microsoft-com:officedata"
> 	exclude-result-prefixes="xsd od xsl">
> 	<xsl:output method="xml" indent="yes"/>
> 	
> 	<xsl:namespace-alias stylesheet-prefix="gen" 
> result-prefix="xsl"/>
> 	<xsl:variable name="schema" 
> select="document('/www/MAD/MAD.xsd')"/>
> 	<xsl:variable name="table" 
> select="$schema/xsd:schema/xsd:element[1]//@ref"/>
> 	<xsl:variable name="element-list" 
> select="$schema/xsd:schema/xsd:element[@name=$schema/xsd:schem
> a/xsd:element[1]//@ref]//xsd:element"/>
> 
> 	<xsl:template match="/">
> 		<gen:stylesheet version="1.0">
> 		<gen:output method="txt" omit-xml-declaration="yes"/>
> 
> 			<gen:template match="/">
> 				<gen:value-of 
> select="concat('COPY &quot;', '{$table}', '&quot; FROM 
> stdin;&#xA;')"/>
> 				<gen:apply-templates 
> select="/dataroot/{$table}"/>  <!-- Flat space imports only -->
> 				<gen:text>\.</gen:text>
> 				<gen:for-each select="{$table}">
> 					<xsl:for-each 
> select="$element-list">
> 						<gen:value-of 
> select="{@name}"/>
> 						<xsl:if 
> test="position() &lt; last()">
> 							
> <gen:text><xsl:value-of select="'&#x9;'"/></gen:text>
> 						</xsl:if>
> 					</xsl:for-each>
> 					<gen:text><xsl:value-of 
> select="'&#xA;'"/></gen:text>
> 				</gen:for-each>
> 			</gen:template>
> 
> <!--
> 			<gen:template match="{$table}">
> 				<xsl:for-each select="$element-list">
> 					<gen:value-of select="{@name}"/>
> 					<xsl:if 
> test="position() &lt; last()">
> 						
> <gen:text><xsl:value-of select="'&#x9;'"/></gen:text>
> 					</xsl:if>
> 				</xsl:for-each>
> 				<gen:text><xsl:value-of 
> select="'&#xA;'"/></gen:text>
> 			</gen:template>
> -->
> 		</gen:stylesheet>
> 	</xsl:template>
> </xsl:stylesheet>
> 
> 
> 
> -- MAD.xsd
> <?xml version="1.0" encoding="UTF-8"?>
> <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"; 
> xmlns:od="urn:schemas-microsoft-com:officedata">
>   <xsd:element name="dataroot">
>     <xsd:complexType>
>       <xsd:sequence>
>         <xsd:element ref="MAD" minOccurs="0" maxOccurs="unbounded"/>
>       </xsd:sequence>
>       <xsd:attribute name="generated" type="xsd:dateTime"/>
>     </xsd:complexType>
>   </xsd:element>
>   <xsd:element name="MAD">
>     <xsd:annotation>
>       <xsd:appinfo/>
>     </xsd:annotation>
>     <xsd:complexType>
>       <xsd:sequence>
>         <xsd:element name="ADDRESS_ID" minOccurs="1" 
> od:jetType="double" od:sqlSType="float" od:nonNullable="yes" 
> type="xsd:double"/>
>         <xsd:element name="APN" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="14"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="PREFIX_TYPE" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="15"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="PREFIX_DIR" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="5"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="ST_NUMBER" minOccurs="0" 
> od:jetType="double" od:sqlSType="float" type="xsd:double"/>
>         <xsd:element name="ST_NAME" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="30"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="ST_TYPE" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="10"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="SUFFIX_DIR" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="5"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="CITY" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="25"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="ZIP" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="10"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="LONG_ADDRESS" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="100"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="ADDR_FRACTION" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="5"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="UNIT_DESIGNATOR" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="10"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="UNIT" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="10"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="FLOOR" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="5"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="BUILDING" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="30"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="ADDRESS_TYPE" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="10"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="ADDRESS_STATUS" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="20"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="ADDRESS_USE" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="25"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="PRIVATE" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="10"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="INCORPORATED" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="10"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="X_COORD" minOccurs="0" 
> od:jetType="double" od:sqlSType="float" type="xsd:double"/>
>         <xsd:element name="Y_COORD" minOccurs="0" 
> od:jetType="double" od:sqlSType="float" type="xsd:double"/>
>         <xsd:element name="DATE_ENTERED" minOccurs="0" 
> od:jetType="datetime" od:sqlSType="datetime" type="xsd:dateTime"/>
>         <xsd:element name="DATE_UPDATED" minOccurs="0" 
> od:jetType="datetime" od:sqlSType="datetime" type="xsd:dateTime"/>
>         <xsd:element name="DATE_RETIRED" minOccurs="0" 
> od:jetType="datetime" od:sqlSType="datetime" type="xsd:dateTime"/>
>         <xsd:element name="SOURCE" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="30"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="BUS_LIC_FLAG" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="10"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="SERC_REQ_FLAG" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="15"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="PROJECT_FLAG" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="15"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="PERMIT_FLAG" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="15"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="ZIP4_FLAG" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="1"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>         <xsd:element name="COORD_SOURCE" minOccurs="0" 
> od:jetType="text" od:sqlSType="nvarchar">
>           <xsd:simpleType>
>             <xsd:restriction base="xsd:string">
>               <xsd:maxLength value="10"/>
>             </xsd:restriction>
>           </xsd:simpleType>
>         </xsd:element>
>       </xsd:sequence>
>     </xsd:complexType>
>   </xsd:element>
> </xsd:schema>
> 
> -- foo.xml (test data)
> <?xml version="1.0" encoding="UTF-8"?>
> <dataroot xmlns:od="urn:schemas-microsoft-com:officedata" 
> xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
> xsi:noNamespaceSchemaLocation="MAD.xsd" 
> generated="2005-04-29T15:13:49">
>   <MAD>
>     <ADDRESS_ID>1</ADDRESS_ID>
>     <APN>22400900350000</APN>
>     <ST_NUMBER>7453</ST_NUMBER>
>     <ST_NAME>TIARA</ST_NAME>
>     <ST_TYPE>WAY</ST_TYPE>
>     <CITY>ORANGEVALE</CITY>
>     <ZIP>95662</ZIP>
>     <LONG_ADDRESS>7453 TIARA WAY       B</LONG_ADDRESS>
>     <UNIT>B</UNIT>
>     <X_COORD>6768867.42298</X_COORD>
>     <Y_COORD>2018154.73453</Y_COORD>
>     <DATE_ENTERED>2005-04-19T00:00:00</DATE_ENTERED>
>     <SOURCE>GISDROP_20050225</SOURCE>
>     <BUS_LIC_FLAG>N</BUS_LIC_FLAG>
>     <SERC_REQ_FLAG>N</SERC_REQ_FLAG>
>     <PROJECT_FLAG>N</PROJECT_FLAG>
>     <PERMIT_FLAG>Y</PERMIT_FLAG>
>     <ZIP4_FLAG>N</ZIP4_FLAG>
>     <COORD_SOURCE>CENTROID</COORD_SOURCE>
>   </MAD>
>   <MAD>
>     <ADDRESS_ID>8</ADDRESS_ID>
>     <APN>24301920130000</APN>
>     <ST_NUMBER>6054</ST_NUMBER>
>     <ST_NAME>SUNRISE</ST_NAME>
>     <ST_TYPE>MALL</ST_TYPE>
>     <CITY>CITRUS HEIGHTS</CITY>
>     <ZIP>95610</ZIP>
>     <LONG_ADDRESS>6054 SUNRISE MALL       B7</LONG_ADDRESS>
>     <UNIT>B7</UNIT>
>     <X_COORD>6770356.0616</X_COORD>
>     <Y_COORD>2007415.14451</Y_COORD>
>     <DATE_ENTERED>2005-04-19T00:00:00</DATE_ENTERED>
>     <SOURCE>GISDROP_20050225</SOURCE>
>     <BUS_LIC_FLAG>N</BUS_LIC_FLAG>
>     <SERC_REQ_FLAG>N</SERC_REQ_FLAG>
>     <PROJECT_FLAG>N</PROJECT_FLAG>
>     <PERMIT_FLAG>Y</PERMIT_FLAG>
>     <ZIP4_FLAG>Y</ZIP4_FLAG>
>     <COORD_SOURCE>CENTROID</COORD_SOURCE>
>   </MAD>
>   <MAD>
>     <ADDRESS_ID>9</ADDRESS_ID>
>     <APN>24301500230000</APN>
>     <ST_NUMBER>7669</ST_NUMBER>
>     <ST_NAME>GREENBACK</ST_NAME>
>     <ST_TYPE>LN</ST_TYPE>
>     <ZIP>0</ZIP>
>     <LONG_ADDRESS>7669 GREENBACK LN       2002</LONG_ADDRESS>
>     <UNIT>2002</UNIT>
>     <X_COORD>6766774.11111</X_COORD>
>     <Y_COORD>2010059.02536</Y_COORD>
>     <DATE_ENTERED>2005-04-19T00:00:00</DATE_ENTERED>
>     <SOURCE>GISDROP_20050225</SOURCE>
>     <BUS_LIC_FLAG>N</BUS_LIC_FLAG>
>     <SERC_REQ_FLAG>N</SERC_REQ_FLAG>
>     <PROJECT_FLAG>N</PROJECT_FLAG>
>     <PERMIT_FLAG>Y</PERMIT_FLAG>
>     <ZIP4_FLAG>N</ZIP4_FLAG>
>     <COORD_SOURCE>CENTROID</COORD_SOURCE>
>   </MAD>
> </dataroot>

Current Thread