Subject: Re: SGML to HTML with jade? From: Mark Burton <markb@xxxxxxxxxx> Date: Thu, 26 Jun 1997 13:26:41 +0100 |
Thanks to everyone who has responded to this thread. Please read on if you can face it! jjc@xxxxxxxxxx said: > I would suggest instead: > - building a list of entries: each entry is a pair of a sort-key and a > sosofo; the sort-key is a list of integers (or maybe a list of list of > integers). Experimentation shows that converting strings to lists of numbers is not always a winner. If the index strings are reasonably long and also don't have a common prefix it takes longer to convert the whole index string to a list of numbers than to compare the first few characters a number of times. The better the sorting algorithm used (fewer comparisons) the less likely it is that it is worth converting the string to a list of numbers. > - sorting the list of entries using a merge sort. This I now do. Wake up, here comes the interesting bit! Further experimentation shows that most of the time isn't going into the sort anyway. Here are three versions of the routine that builds the index with the time taken for a fairly large document to be processed. This is the real routine, the total time taken is about 80 seconds (define (build-index nl) (let loop ((result '()) (nl nl)) (if (node-list-empty? nl) result (loop (cons (make-index-entry (node-list-first nl)) result) (node-list-rest nl))))) The next version still traverses the node list but returns an empty list of index entries. The total time taken is about 60 seconds. The difference being the time taken to build the list entries, sort and output the list. (not a huge difference) (define (build-index nl) (let loop ((result '()) (nl nl)) (if (node-list-empty? nl) result (loop '() (node-list-rest nl))))) This version doesn't bother to traverse the node list at all! It takes about 10 seconds. (A huge difference) (define (build-index nl) '()) Am I being incredibly thick or is Jade taking a long time to execute node-list-empty? and/or node-list-rest ? I enclose the current effort. regards, Mark
<!--doctype style-sheet PUBLIC "-//James Clark//DTD DSSSL Style Sheet//EN"--> ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; dbtohtml.dsl - DSSSL style sheet for DocBook to HTML conversion (jadeware) ; ; Author : Mark Burton (markb@xxxxxxxxxx) ; Created On : Fri Jun 13 18:21:14 1997 ; Last Modified By: Mark Burton ; Last Modified On: Thu Jun 26 13:21:22 1997 ; ; $Id: dbtohtml.dsl,v 1.4 1997/06/25 22:39:00 markb Exp $ ; ; Usage: ; ; jade -d dbtohtml.dsl -t sgml yourdoc.sgm ; ; Additional command line options: ; ; -V %no-split-output% sends all the output to one file ; -V %no-make-index% disables (horribly slow) index creation ; -V %no-make-toc% disables TOC creation ; -V %no-shade-screen% disables grey background to SCREEN regions ; -V %show-comments% includes contents of COMMENT regions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; declare non-standard functions (declare-flow-object-class element "UNREGISTERED::James Clark//Flow Object Class::element") (declare-flow-object-class empty-element "UNREGISTERED::James Clark//Flow Object Class::empty-element") (declare-flow-object-class document-type "UNREGISTERED::James Clark//Flow Object Class::document-type") (declare-flow-object-class processing-instruction "UNREGISTERED::James Clark//Flow Object Class::processing-instruction") (declare-flow-object-class entity "UNREGISTERED::James Clark//Flow Object Class::entity") (declare-flow-object-class entity-ref "UNREGISTERED::James Clark//Flow Object Class::entity-ref") (declare-flow-object-class formatting-instruction "UNREGISTERED::James Clark//Flow Object Class::formatting-instruction") (declare-characteristic preserve-sdata? "UNREGISTERED::James Clark//Characteristic::preserve-sdata?" #f) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; variables (define %no-split-output% #f) (define %no-make-toc% #f) (define %no-make-index% #f) (define %no-shade-screen% #f) (define %show-comments% #f) (define %html-public-id% "-//W3C//DTD HTML 3.2 Final//EN") (define %output-basename% "DBTOHTML") (define %output-suffix% ".html") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; top-level sections (element BOOK (cond (%no-split-output% ; everything goes in one file (make-file (string-append %output-basename% %output-suffix%) (make sequence (process-children) (cond ((not %no-make-index%) (make sequence (make-fat-rule) (make-index))) (#t (empty-sosofo)))))) (#t ; split output into separate files (make sequence (make-file (string-append %output-basename% %output-suffix%) (make sequence (process-first-descendant "TITLE") (process-first-descendant "BOOKINFO"))) (process-matching-children "PREFACE" "CHAPTER" "APPENDIX") (cond ((not %no-make-index%) (make-file (string-append %output-basename% "-INDEX" %output-suffix%) (make-index))) (#t (empty-sosofo))))))) (define (make-file file-name content) (make entity system-id: file-name (make sequence (make document-type name: "html" public-id: %html-public-id%) (make element gi: "HTML" (make sequence (make element gi: "HEAD" (make element gi: "TITLE" (with-mode extract-title-text (process-first-descendant "TITLE")))) (make element gi: "BODY" (make sequence (make-anchor) content ; (make-footer) ))))))) (define (make-footer) (let ((corpauthors (select-elements (subtree (ancestor "BOOK")) "CORPAUTHOR"))) (cond ((not (node-list-empty? corpauthors)) (make element gi: "P" (make element gi: "B" (make sequence (make entity-ref name: "copy") (process-node-list corpauthors))))) (#t (empty-sosofo))))) (define (make-pref-chap-app) (cond (%no-split-output% (make sequence (make-anchor) (make-fat-rule) (process-children))) (#t (make-file (link-file-name (current-node)) (process-children))))) (element PREFACE (make-pref-chap-app)) (element CHAPTER (make-pref-chap-app)) (element APPENDIX (make-pref-chap-app)) (element BEGINPAGE (make empty-element gi: "HR")) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; sections (element SECT1 (make sequence (make-anchor) (process-children))) (element SECT2 (make sequence (make-anchor) (process-children))) (element SECT3 (make sequence (make-anchor) (process-children))) (element SECT4 (make sequence (make-anchor) (process-children))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; titles (mode extract-title-text (element (TITLE) (process-children))) (element (BOOK TITLE) (make sequence (make element gi: "CENTER" (make element gi: "H1" (process-children))))) (element (CHAPTER TITLE) (make element gi: "H1" (make sequence (literal (chap-app-head-label "Chapter")) (process-children-trim)))) (element (APPENDIX TITLE) (make element gi: "H1" (make sequence (literal (chap-app-head-label "Appendix")) (process-children-trim)))) (element (SECT1 TITLE) (make element gi: "H2")) (element (SECT2 TITLE) (make element gi: "H3")) (element (SECT3 TITLE) (make element gi: "H4")) (element (SECT4 TITLE) (make element gi: "H5")) (element (FORMALPARA TITLE) (make element gi: "H5")) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; font changers (element EMPHASIS (make element gi: "I")) (element TYPE (make element gi: "B" (make element gi: "TT"))) (element TOKEN (make element gi: "I" (make element gi: "B" (make element gi: "TT")))) (element REPLACEABLE (make element gi: "I")) (element FIRSTTERM (make element gi: "I")) (element APPLICATION (make element gi: "TT")) (element FILENAME (make element gi: "TT")) (element LITERAL (make element gi: "TT")) (element ENVAR (make element gi: "TT")) (element SUBSCRIPT (make element gi: "SUB")) (element SUPERSCRIPT (make element gi: "SUP")) (element CITETITLE (make element gi: "I")) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; paragraph like things (element CAUTION (make-special-para)) (element IMPORTANT (make-special-para)) (element WARNING (make-special-para)) (element NOTE (make-special-para)) (element TIP (make-special-para)) (element COMMENT (cond (%show-comments% (make element gi: "FONT" attributes: '(("color" "red")) (make-special-para))) (#t (empty-sosofo)))) (element PARA (make sequence (make empty-element gi: "P") (with-mode footnote-ref (process-children)) (with-mode footnote-def (process-matching-children "FOOTNOTE")))) (element BLOCKQUOTE (make element gi: "BLOCKQUOTE")) (element SCREEN (let ((gubbins (make element gi: "PRE" (process-children)))) (make sequence (make empty-element gi: "P") (if %no-shade-screen% gubbins (make element gi: "TABLE" attributes: '(("border" "0") ("bgcolor" "#E0E0E0") ("width" "100%")) (make element gi: "TR" (make element gi: "TD" gubbins))))))) (element FORMALPARA (process-children)) (element PHRASE (maybe-bold-children)) (mode footnote-ref (element FOOTNOTE (make sequence (literal "[") (literal (format-number (element-number (current-node)) "1")) (literal "]")))) (mode footnote-def (element FOOTNOTE (make element gi: "BLOCKQUOTE" (make sequence (literal "[") (literal (format-number (element-number (current-node)) "1")) (literal "]") (process-children))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; lists (element ITEMIZEDLIST (make sequence (make empty-element gi: "P") (make element gi: "UL"))) (element ORDEREDLIST (make sequence (make empty-element gi: "P") (make element gi: "OL"))) (element (ITEMIZEDLIST LISTITEM) (make sequence (make empty-element gi: "LI") (process-children) (make empty-element gi: "P"))) (element (ORDEREDLIST LISTITEM) (make sequence (make empty-element gi: "LI") (process-children) (make empty-element gi: "P"))) (element VARIABLELIST (make sequence (make empty-element gi: "P") (make element gi: "DL"))) (element VARLISTENTRY (make sequence (make empty-element gi: "DT") (process-children))) (element (VARLISTENTRY LISTITEM) (make sequence (make empty-element gi: "DD") (process-children) (make empty-element gi: "P"))) (element TERM (maybe-bold-children)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; index (define (index-entry-name indexterm) (string-append "index." (format-number (element-number indexterm) "1"))) (element INDEXTERM (make sequence (make element gi: "A" attributes: (list (list "name" (index-entry-name (current-node)))) (literal "")) (empty-sosofo))) ; DIY string-ci>? (define (string-ci>? s1 s2) (let ((len1 (string-length s1)) (len2 (string-length s2))) (let loop ((i 0)) (cond ((= i len1) #f) ((= i len2) #t) (#t (let ((c1 (index-char-val (string-ref s1 i))) (c2 (index-char-val (string-ref s2 i)))) (cond ((= c1 c2) (loop (+ i 1))) (#t (> c1 c2))))))))) (define (index-char-val ch) (case ch ((#\A #\a) 65) ((#\B #\b) 66) ((#\C #\c) 67) ((#\D #\d) 68) ((#\E #\e) 69) ((#\F #\f) 70) ((#\G #\g) 71) ((#\H #\h) 72) ((#\I #\i) 73) ((#\J #\j) 74) ((#\K #\k) 75) ((#\L #\l) 76) ((#\M #\m) 77) ((#\N #\n) 78) ((#\O #\o) 79) ((#\P #\p) 80) ((#\Q #\q) 81) ((#\R #\r) 82) ((#\S #\s) 83) ((#\T #\t) 84) ((#\U #\u) 85) ((#\V #\v) 86) ((#\W #\w) 87) ((#\X #\x) 88) ((#\Y #\y) 89) ((#\Z #\z) 90) ((#\ ) 32) ((#\0) 48) ((#\1) 49) ((#\2) 50) ((#\3) 51) ((#\4) 52) ((#\5) 53) ((#\6) 54) ((#\7) 55) ((#\8) 56) ((#\9) 57) ; laziness precludes me from filling this out further (else 0))) (define (string->number-list s) (let loop ((i (- (string-length s) 1)) (l '())) (if (< i 0) l (loop (- i 1) (cons (index-char-val (string-ref s i)) l))))) (define (number-list>? l1 l2) (cond ((null? l1) #f) ((null? l2) #t) ((= (car l1) (car l2)) (number-list>? (cdr l1) (cdr l2))) (#t (> (car l1) (car l2))))) ; return the string data for a given index entry (define (get-index-entry-data entry) (let ((primary (select-elements (descendants entry) "PRIMARY")) (secondary (select-elements (descendants entry) "SECONDARY"))) (if (node-list-empty? secondary) (data primary) (string-append (data primary) " - " (data secondary))))) (define (make-index-entry entry) (let ((text (get-index-entry-data entry))) (cons text (make sequence (make empty-element gi: "LI") (make element gi: "A" attributes: (list (list "href" (string-append (link-file-name entry) "#" (index-entry-name entry)))) (literal text)))))) (define (build-index nl) (let loop ((result '()) (nl nl)) (if (node-list-empty? nl) result (loop (cons (make-index-entry (node-list-first nl)) result) (node-list-rest nl))))) (define (sort-index il) (letrec ((list-head (lambda (l n) (if (> n 0) (cons (car l) (list-head (cdr l) (- n 1))) '()))) (merge (lambda (il1 il2) (cond ((null? il1) il2) ((null? il2) il1) ((string-ci>? (car (car il1)) (car (car il2))) (cons (car il2) (merge il1 (cdr il2)))) (#t (cons (car il1) (merge (cdr il1) il2))))))) (let* ((ll (length il)) (ldiv2 (quotient ll 2))) (if (> 2 ll) il (merge (sort-index (list-head il ldiv2)) (sort-index (list-tail il ldiv2))))))) (define (output-index il) (let extract-and-append ((il il) (result (empty-sosofo))) (if (null? il) result (extract-and-append (cdr il) (sosofo-append result (cdr (car il))))))) (define (make-index) (make sequence (make element gi: "A" attributes: (list (list "name" "INDEXTOP")) (literal "")) (make element gi: "H1" (literal "Index")) (make element gi: "UL" (output-index (sort-index (build-index (select-elements (subtree (current-node)) "INDEXTERM"))))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; links & cross-references (define (link-file-name target) (cond (%no-split-output% "") (#t (string-append %output-basename% "-" (cond ((equal? (gi target) "CHAPTER") (format-number (child-number target) "1")) ((ancestor-child-number "CHAPTER" target) (format-number (ancestor-child-number "CHAPTER" target) "1")) ((equal? (gi target) "APPENDIX") (format-number (child-number target) "A")) ((ancestor-child-number "APPENDIX" target) (format-number (ancestor-child-number "APPENDIX" target) "A")) (#t "")) %output-suffix%)))) (element LINK (let* ((target (element-with-id (attribute-string "linkend") (ancestor "BOOK"))) (target-file-name (link-file-name target))) (make element gi: "A" attributes: (list (list "href" (string-append target-file-name "#" (attribute-string "linkend"))))))) (element ULINK (make element gi: "A" attributes: (list (list "href" (attribute-string "url"))))) (element XREF (let* ((target (element-with-id (attribute-string "linkend") (ancestor "BOOK"))) (target-file-name (link-file-name target))) (make element gi: "A" attributes: (list (list "href" (string-append target-file-name "#" (attribute-string "linkend")))) (with-mode extract-xref-text (process-node-list target))))) (mode extract-xref-text (default (let ((title-sosofo (with-mode extract-title-text (process-first-descendant "TITLE")))) (if (sosofo? title-sosofo) title-sosofo (literal (string-append "Reference to " (gi))))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; figures (element FIGURE (make sequence (make empty-element gi: "P") (make-anchor) (process-children) (make empty-element gi: "P"))) (element (FIGURE TITLE) (make sequence (make element gi: "B") (make empty-element gi: "P"))) (element GRAPHIC (let ((img (make sequence (make empty-element gi: "P") (make empty-element gi: "IMG" attributes: (list (list "src" (attribute-string "fileref"))))))) (if (equal? (attribute-string "align") "CENTER") (make element gi: "CENTER" img) img))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; tables (element INFORMALTABLE (make sequence (make empty-element gi: "P") (make element gi: "TABLE" attributes: (if (equal? (attribute-string "frame") "ALL") '(("border" "2") ("cellpadding" "2")) '())) (make empty-element gi: "P"))) (element TGROUP (process-children)) (element THEAD (process-children)) (element (THEAD ROW) (make sequence (make empty-element gi: "TR") (process-children))) (element (THEAD ROW ENTRY) (make sequence (make empty-element gi: "TD") (make element gi: "B" (process-children)))) (element TBODY (process-children)) (element (TBODY ROW) (make sequence (make empty-element gi: "TR") (process-children))) (element (TBODY ROW ENTRY) (make sequence (make empty-element gi: "TD") (process-children))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; book info (element BOOKINFO (make sequence (make element gi: "CENTER" (process-children)) (cond ((not %no-make-toc%) (make sequence (make-fat-rule) (make element gi: "H2" (literal "Contents")) (make element gi: "ul" (with-mode make-toc-links (process-node-list (ancestor "BOOK")))))) (#t (empty-sosofo))))) (element AUTHORGROUP (make element gi: "B" (process-children))) (element COPYRIGHT (make sequence (make entity-ref name: "copy") (process-matching-children "HOLDER") (process-matching-children "YEAR"))) (element HOLDER (make sequence (literal " ") (process-children-trim))) (element YEAR (make sequence (literal " ") (process-children-trim))) (element CORPAUTHOR (make sequence (make empty-element gi: "P") (process-children-trim))) (element AUTHOR (make sequence (make empty-element gi: "P") (process-children-trim))) (element HONORIFIC (make sequence (process-children-trim) (literal " "))) (element FIRSTNAME (make sequence (process-children-trim) (literal " "))) (element OTHERNAME (make sequence (process-children-trim) (literal " "))) (element SURNAME (make sequence (process-children-trim) (literal " "))) (element LINEAGE (make sequence (process-children-trim) (literal " "))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; TOC (element LOF (empty-sosofo)) (element LOT (empty-sosofo)) (element TOC (empty-sosofo)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; DIY TOC (mode make-toc-links (element (BOOK) (sosofo-append (process-children) (cond ((not %no-make-index%) (make sequence (make empty-element gi: "LI") (make element gi: "A" attributes: (list (list "href" (cond (%no-split-output% "#INDEXTOP") (#t (string-append %output-basename% "-INDEX" %output-suffix% "#INDEXTOP"))))) (literal "Index")))) (#t (empty-sosofo))))) (element (CHAPTER) (make-chap-or-app-toc-links)) (element (APPENDIX) (make-chap-or-app-toc-links)) (element (SECT1) (make sequence (make empty-element gi: "LI") (let ((title-text (with-mode extract-title-text (process-first-descendant "TITLE")))) (if (id) (make element gi: "A" attributes: (list (list "href" (string-append (link-file-name (current-node)) "#" (id)))) title-text) title-text)))) (default (empty-sosofo))) (define (make-chap-or-app-toc-links) (make sequence (make empty-element gi: "LI") (let ((title-text (make sequence (literal (if (equal? (gi) "CHAPTER") (string-append "Chapter " (format-number (element-number (current-node)) "1") " - ") (string-append "Appendix " (format-number (element-number (current-node)) "A") " - "))) (with-mode extract-title-text (process-first-descendant "TITLE"))))) (if (id) (make element gi: "A" attributes: (list (list "href" (string-append (link-file-name (current-node)) "#" (id)))) title-text) title-text)) (make element gi: "UL" (with-mode make-toc-links (process-matching-children "SECT1"))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; make the unimplemented bits stand out (default (make element gi: "FONT" attributes: '(("color" "red")))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; standard subroutines ;(define (node-list-reduce nl combine init) ; (if (node-list-empty? nl) ; init ; (node-list-reduce (node-list-rest nl) ; combine ; (combine init (node-list-first nl))))) (define (subtree nl) (node-list-map (lambda (snl) (node-list snl (subtree (children snl)))) nl)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; various homebrew subroutines (define (make-fat-rule) (make empty-element gi: "HR" attributes: '(("size" "5")))) (define (chap-app-head-label chap-or-app) (let ((label (attribute-string "label" (ancestor chap-or-app)))) (string-append chap-or-app " " (if label (if (equal? label "auto") (format-number (element-number (ancestor chap-or-app)) (if (equal? chap-or-app "Chapter") "1" "A")) label) (format-number (element-number (ancestor chap-or-app)) (if (equal? chap-or-app "Chapter") "1" "A"))) ". "))) (define (make-anchor) (if (id) (make element gi: "A" attributes: (list (list "name" (id))) (literal "")) (empty-sosofo))) (define (make-special-para) (make sequence (make empty-element gi: "P") (make element gi: "B" (literal (string-append (gi) ":"))) (make element gi: "BLOCKQUOTE" (process-children)))) (define (maybe-bold-children) (cond ((equal? (attribute-string "role") "bold") (make element gi: "B" (process-children-trim))) (#t (process-children-trim)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; the end
Current Thread |
---|
|
<- Previous | Index | Next -> |
---|---|---|
Re: SGML to HTML with jade?, James Clark | Thread | Re: SGML to HTML with jade?, James Clark |
Re: More thoughts on multiple style, James Clark | Date | Re: SGML to HTML with jade?, James Clark |
Month |