Re: [xsl] Creating ZIP archive from folder

Subject: Re: [xsl] Creating ZIP archive from folder
From: "Michael Müller-Hillebrand mmh@xxxxxxxxx" <xsl-list-service@xxxxxxxxxxxxxxxxxxxxxx>
Date: Sat, 28 Nov 2020 16:46:54 -0000
Okay,

Below is my little Saturday joy. A few notes:
* I have not tested this on Windows.
* I moved the namespace declarations to the functions to increase
portability.
* I read about saxon:do in
https://www.saxonica.com/documentation9.9/index.html#!extensions/functions/ex
path and used it when writing the binary. I am not sure whether that is the
correct way to use it, or if I should have used it more often.
* Because of saxon:do you need at least Saxon 9.9
* To avoid any problems with the concept of "current working directory" I
expect an absolute path as parameter

Feedback is very welcome.

- Michael

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform";
  xmlns:xs="http://www.w3.org/2001/XMLSchema";
  xmlns:dy="http://www.docufy.de/XSL/functions";
  xmlns:saxon="http://saxon.sf.net/";
  extension-element-prefixes="saxon"
  exclude-result-prefixes="#all"
  version="3.0">

  <xsl:param name="folder-with-content"
select="'/Users/michael/Downloads/someFolder'" as="xs:string"/>

  <xsl:template name="xsl:initial-template">
    <xsl:message select="'Folder: ',$folder-with-content"></xsl:message>
    <xsl:message select="'Created: '">
      <xsl:sequence select="dy:zip-folder-content($folder-with-content)"/>
    </xsl:message>
  </xsl:template>

  <xsl:function name="dy:zip-folder-content" as="xs:string"
    xmlns:file="http://expath.org/ns/file";
    xmlns:arch="http://expath.org/ns/archive";>
    <xsl:param name="folder-path" as="xs:string"/>
    <xsl:variable name="folder-path" select="$folder-path => replace('\\','/')
=> replace('/$', '')" as="xs:string"/>
    <xsl:choose>
      <xsl:when test="file:is-dir($folder-path)">
        <xsl:variable name="all-files" select="file:list($folder-path,
true())[not(dy:skip-file(.))] ! replace(., '\\','/')" as="xs:string*"/>
        <!--<xsl:message select="'All files:', string-join($all-files,
'&#xA;')"/>-->
        <xsl:choose>
          <xsl:when test="count($all-files) > 0">
            <xsl:variable name="zip-content"
select="dy:content-from-files($folder-path, $all-files)"
as="xs:base64Binary"/>
            <xsl:variable name="archive-path"
select="dy:archive-path($folder-path)" as="xs:string"/>
            <saxon:do action="file:write-binary($archive-path,
$zip-content)"/>
            <xsl:sequence select="$archive-path"/>
          </xsl:when>
          <xsl:otherwise>
            <xsl:sequence select="'WARN Nothing to zip: ' || $folder-path"/>
          </xsl:otherwise>
        </xsl:choose>
      </xsl:when>
      <xsl:otherwise>
        <xsl:sequence select="'ERROR Not a folder: ' || $folder-path"/>
      </xsl:otherwise>
    </xsl:choose>
  </xsl:function>

  <xsl:function name="dy:content-from-files" as="xs:base64Binary"
    xmlns:file="http://expath.org/ns/file";
    xmlns:arch="http://expath.org/ns/archive";>
    <xsl:param name="folder-path" as="xs:string"/>
    <xsl:param name="files" as="xs:string*"/>
    <xsl:variable name="file-paths" select="for $f in $files return
$folder-path || '/' || $f" as="xs:string*"/>
    <!--<xsl:message select="'Names:', string-join($file-paths,
'&#xA;')"/>-->
    <xsl:variable name="content" as="xs:base64Binary*" select="for $f in
$file-paths return
                  if(file:is-dir($f))
                  then xs:base64Binary('')
                  else file:read-binary($f)"/>
    <xsl:sequence select="arch:create($files, $content)"/>
  </xsl:function>

  <xsl:function name="dy:skip-file" as="xs:boolean">
    <xsl:param name="fname" as="xs:string"/>
    <xsl:sequence select="
      if (starts-with($fname, '.')) then true() else
      if (contains($fname, '.DS_Store')) then true() else false()
      "/>
  </xsl:function>

  <!--
    Rules (base on the behavior of macOS Keka.app https://www.keka.io/):
    * With a single folder content (file or folder), the archive gets the name
of that content
    * With multiple folder contents, the archive gets the name of the parent
folder
    * In case of duplicates, " 2", " 3" etc. are added recursively
  -->

  <xsl:function name="dy:archive-path" as="xs:string"
    xmlns:file="http://expath.org/ns/file";>
    <xsl:param name="folder-path" as="xs:string"/>
    <xsl:variable name="root-files" select="file:list($folder-path,
false())[not(dy:skip-file(.))] ! replace(., '\\','/')" as="xs:string*"/>
    <!--<xsl:message select="'Root files:', $root-files"/>-->
    <xsl:choose>
      <xsl:when test="count($root-files) = 1">
        <xsl:sequence select="dy:archive-path($folder-path, $root-files[1] =>
replace('/$', ''), 1)"/>
      </xsl:when>
      <xsl:otherwise>
        <xsl:sequence select="dy:archive-path($folder-path,
tokenize($folder-path, '/')[last()], 1)"/>
      </xsl:otherwise>
    </xsl:choose>
  </xsl:function>

  <xsl:function name="dy:archive-path" as="xs:string"
    xmlns:file="http://expath.org/ns/file";>
    <xsl:param name="folder-path" as="xs:string"/>
    <xsl:param name="archive-name" as="xs:string"/>
    <xsl:param name="copy-count" as="xs:integer"/>
    <xsl:variable name="candidate" select="$folder-path || '/' ||
$archive-name
      || (if ($copy-count > 1) then ' ' || $copy-count else '')
      || '.zip'" as="xs:string"/>
    <!--<xsl:message select="'Candidate:', $candidate"/>-->
    <xsl:choose>
      <xsl:when test="not(file:exists($candidate))">
        <xsl:sequence select="$candidate"/>
      </xsl:when>
      <xsl:otherwise>
        <xsl:sequence select="dy:archive-path($folder-path, $archive-name,
$copy-count + 1)"/>
      </xsl:otherwise>
    </xsl:choose>
  </xsl:function>

</xsl:stylesheet>


> Am 27.11.2020 um 20:37 schrieb Graydon graydon@xxxxxxxxx
<xsl-list-service@xxxxxxxxxxxxxxxxxxxxxx>:
>
> On Fri, Nov 27, 2020 at 07:31:14PM -0000, Michael MC<ller-Hillebrand
> mmh@xxxxxxxxx scripsit:
>> Before I start digging deep, I would hope that someone used these
>> features to create a function which creates a ZIP archive from a a
>> list of root elements and collecting all contained files
>> automatically. My XSLT create a lot of files and folders using
>> xsl:result-document and at the end I would like to zip this up.
>
> There's an EXPath archive module, http://expath.org/spec/archive
>
> Getting the contents sounds like a use for
http://expath.org/spec/file#fn.list
>
> So not quite a single function, but close.

Current Thread