[xsl] flat file transformation: Splitting and merging of OpenOffice 1.1 Documents

Subject: [xsl] flat file transformation: Splitting and merging of OpenOffice 1.1 Documents
From: Xsl-list <Xsl-list@xxxxxxxxxxxxx>
Date: Fri, 22 Aug 2003 16:55:24 +0200
Hi Charles,
finally I came to a solution for my
Splitting-Open-Office-1.1-Document-problem.
As You said before, some XPATH expressions in the stylesheet were to
restrictive.
Here is the final stylesheet. I used the redirect extension of Xalan.
The stylesheet writes all elements that begin before the first chapter (e.g.
style information) and a list of all chapters in a file named
"kopfdaten.xml"
Then it creates XML files for the chapters (without the elements that
precede chapter 1) named e.g. "kapitel-3.xml".
These files can then be processed with other tools, in my case for doing
translations.
Merging the resulting XML files to get back a valid OpenOffice 1.1 doc is
easy now, because I can use the list of all chapters in the file
"kopfdaten.xml".
Thank You for helping me out!

To get a valid XML instance simply unzip a *.SXW file (OpenOffice 1.1
format) and extract content.xml from there. 
*********************************************************************
SPLIT
*********************************************************************
<?xml version="1.0" encoding="UTF-8" ?>
<xsl:stylesheet version="1.0"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform";
  xmlns:xalan="org.apache.xalan.xslt.extensions.Redirect"
extension-element-prefixes="xalan"
  xmlns:office="http://openoffice.org/2000/office"; 
  xmlns:style="http://openoffice.org/2000/style"; 
  xmlns:text="http://openoffice.org/2000/text"; 
  xmlns:table="http://openoffice.org/2000/table"; 
  xmlns:draw="http://openoffice.org/2000/drawing"; 
  xmlns:fo="http://www.w3.org/1999/XSL/Format"; 
  xmlns:xlink="http://www.w3.org/1999/xlink"; 
  xmlns:number="http://openoffice.org/2000/datastyle"; 
  xmlns:svg="http://www.w3.org/2000/svg"; 
  xmlns:chart="http://openoffice.org/2000/chart"; 
  xmlns:dr3d="http://openoffice.org/2000/dr3d"; 
  xmlns:math="http://www.w3.org/1998/Math/MathML"; 
  xmlns:form="http://openoffice.org/2000/form"; 
  xmlns:script="http://openoffice.org/2000/script"; office:class="text"
office:version="1.0"
  xmlns:meta="http://openoffice.org/2000/meta";
  xmlns:dc="http://purl.org/dc/elements/1.1/";>

  <xsl:output method="xml" indent="no" encoding="UTF-8"
doctype-public="-//OpenOffice.org//DTD OfficeDocument 1.0//EN"
doctype-system="office.dtd"/>
  <xsl:strip-space elements="*" />
 
  <xsl:template match="/">
    <xsl:apply-templates select="//office:body"/>
  </xsl:template>

  <xsl:template match="office:body">
    <xsl:apply-templates />
  </xsl:template>

  <xsl:template match="text:h[@text:level='1']">
    <xsl:variable name="kap-num"
select="count(preceding-sibling::*[name()='text:h' and @text:level='1']) +
1" />
    
    <xsl:choose>
      <xsl:when test="$kap-num = '1'">
        <xalan:write select="concat('kopfdaten','.xml')">
          <office:document-content>
            <xsl:copy-of
select="/office:document-content/office:body/preceding-sibling::*"/>
            <office:body>
              <xsl:copy-of
select="/office:document-content/office:body/text:h[1]/preceding-sibling::*"
/>
              <files>
                <xsl:for-each select="//text:h[@text:level='1']">
                  <xsl:call-template name="dateiliste">
                    <xsl:with-param name="anzahl">
                      <xsl:value-of
select="count(following-sibling::node()[name()='text:h' and
@text:level='1']) + 1" />
                    </xsl:with-param>
                  </xsl:call-template>
                </xsl:for-each>
              </files>
            </office:body>
          </office:document-content>
        </xalan:write>
        <xalan:write select="concat('kapitel-',$kap-num,'.xml')">
          <office:document-content>
            <office:body>
              <xsl:copy-of select="."/>
              <xsl:apply-templates select="following-sibling::node()">
                <xsl:with-param name="num">
                  <xsl:value-of select="$kap-num" />
                </xsl:with-param>
              </xsl:apply-templates>
            </office:body>
          </office:document-content>
        </xalan:write>
      </xsl:when>
      <xsl:otherwise>
        <xalan:write select="concat('kapitel-',$kap-num,'.xml')">
          <office:document-content>
            <office:body>
              <xsl:copy-of select="."/>
              <xsl:apply-templates select="following-sibling::node()">
                <xsl:with-param name="num">
                  <xsl:value-of select="$kap-num" />
                </xsl:with-param>
              </xsl:apply-templates>
            </office:body>
          </office:document-content>
        </xalan:write>
      </xsl:otherwise>
    </xsl:choose>
    
    
  </xsl:template>

  <xsl:template match="node()">
    <xsl:param name="num" />
    <xsl:variable name="parent-kap"
select="count(preceding-sibling::*[name()='text:h' and @text:level='1'])" />
    <xsl:if test="$num = $parent-kap">
      <xsl:copy-of select="." />
    </xsl:if>
  </xsl:template>

  <xsl:template name="dateiliste">
    <xsl:param name="anzahl" />
    <xsl:if test="not($anzahl='0')">
    <file>
      <xsl:variable name="anzahl-kapitel-gesamt"
select="count(//text:h[@text:level='1']) + 1" />
      <xsl:value-of select="concat('kapitel-',$anzahl-kapitel-gesamt -
$anzahl,'.xml')" />
    </file>
    </xsl:if>
  </xsl:template>

</xsl:stylesheet>
*********************************************************************
MERGE
*********************************************************************
<?xml version="1.0" encoding="UTF-8" ?>
<xsl:stylesheet version="1.0"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform";
  xmlns:xalan="org.apache.xalan.xslt.extensions.Redirect"
extension-element-prefixes="xalan"
  xmlns:office="http://openoffice.org/2000/office"; 
  xmlns:style="http://openoffice.org/2000/style"; 
  xmlns:text="http://openoffice.org/2000/text"; 
  xmlns:table="http://openoffice.org/2000/table"; 
  xmlns:draw="http://openoffice.org/2000/drawing"; 
  xmlns:fo="http://www.w3.org/1999/XSL/Format"; 
  xmlns:xlink="http://www.w3.org/1999/xlink"; 
  xmlns:number="http://openoffice.org/2000/datastyle"; 
  xmlns:svg="http://www.w3.org/2000/svg"; 
  xmlns:chart="http://openoffice.org/2000/chart"; 
  xmlns:dr3d="http://openoffice.org/2000/dr3d"; 
  xmlns:math="http://www.w3.org/1998/Math/MathML"; 
  xmlns:form="http://openoffice.org/2000/form"; 
  xmlns:script="http://openoffice.org/2000/script"; office:class="text"
office:version="1.0"
  xmlns:meta="http://openoffice.org/2000/meta";
  xmlns:dc="http://purl.org/dc/elements/1.1/";>

  <xsl:output method="xml" indent="no" encoding="UTF-8"
doctype-public="-//OpenOffice.org//DTD OfficeDocument 1.0//EN"
doctype-system="office.dtd"/>
  <xsl:strip-space elements="*" />
  
  <xsl:template match="@*|node()">
	  <xsl:copy>
	    <xsl:apply-templates select="@*|node()"/>
	  </xsl:copy>
	</xsl:template>

  <xsl:template match="files">
    <xsl:for-each select="//files/file">
      <xsl:variable name="file" select="document(text())"/>
		  <xsl:copy-of
select="$file/office:document-content/office:body/*"/>
    </xsl:for-each>
  </xsl:template>

</xsl:stylesheet>
*********************************************************************

 XSL-List info and archive:  http://www.mulberrytech.com/xsl/xsl-list


Current Thread