root/trunk/freewrt/scripts/roff2htm

Revision 3741, 8.6 kB (checked in by tg, 8 months ago)

more mann

  • Property svn:keywords set to Id
Line 
1 # $FreeWRT$
2 # $MirOS: src/scripts/roff2htm,v 1.55 2007/10/23 11:09:38 tg Exp $
3 # $ekkoBSD: catman2html.sh,v 1.2 2004/03/07 03:02:53 stephen Exp $
4 #-
5 # Copyright (c) 2004, 2005, 2006, 2007
6 #       Thorsten “mirabilos” Glaser <tg@mirbsd.de>
7 # Original version for ekkoBSD by:
8 # Copyright (c) 2004
9 #       Stephen Paskaluk <sap@mirbsd.org>
10 # Parts of the regular expression set below are based upon work by:
11 # Copyright (c) 1995
12 #       Panagiotis J. Christias <christia@theseas.ntua.gr>
13 #
14 # Provided that these terms and disclaimer and all copyright notices
15 # are retained or reproduced in an accompanying document, permission
16 # is granted to deal in this work without restriction, including un-
17 # limited rights to use, publicly perform, distribute, sell, modify,
18 # merge, give away, or sublicence.
19 #
20 # Advertising materials mentioning features or use of this work must
21 # display the following acknowledgement:
22 #       This product includes material provided by Thorsten Glaser.
23 #
24 # This work is provided “AS IS” and WITHOUT WARRANTY of any kind, to
25 # the utmost extent permitted by applicable law, neither express nor
26 # implied; without malicious intent or gross negligence. In no event
27 # may a licensor, author or contributor be held liable for indirect,
28 # direct, other damage, loss, or other issues arising in any way out
29 # of dealing in the work, even if advised of the possibility of such
30 # damage or existence of a defect, except proven that it results out
31 # of said person's immediate fault when using the work as intended.
32 #-
33 # Routines for converting catman pages and nrcon(1)d papers to HTML.
34 # ATTENTION: this file contains embedded white-, backspace and high-
35 #            bit-on control characters! Use “jupp --asis $0” to edit
36 # Note: this file contains magic and can’t be edited as UTF-8 either.
37 # Note: this script assumes MirBSD filesystem interna: ino_t=uint32_t
38
39 # check if mksh R31:2007/10/18 or up
40 if [[ $KSH_VERSION = @(\@\(#\)MIRBSD KSH R)@(3[2-9]|[4-9][0-9]|[1-9][0-9][0-9])\ +([0-9])/+([0-9])/+([0-9])?(\ *) ]]; then
41         i=0
42 elif [[ $KSH_VERSION = @(\@\(#\)MIRBSD KSH R31)* ]]; then
43         eval $(print "$KSH_VERSION" | sed 's#^.*R31 \([0-9]*\)/\([0-9]*\)/\([0-9]*\)\( .*\)*$#y=\1 m=\2 d=\3#')
44         (( i = y < 2007 ? 1 :
45             y > 2007 ? 0 :
46             m < 10 ? 1 :
47             m > 10 ? 0 :
48             d < 18 ? 1 : 0 ))
49         unset y m d
50 else
51         i=1
52 fi
53 # we need an mksh version with uint32_t array indicēs
54 if (( i )); then
55         print -u2 Error: your mksh is not recent enough.
56         print -u2 Please upgrade to at least mksh R32.
57         exit 1
58 fi
59 unset i
60
61 # initialise globals
62 roff2htm_gendate=$(date +"%F %T")               # current time
63 set -A roff2htm_inodecache                      # inode cache (empty)
64 if stat --help >/dev/null 2>&1; then
65         roff2htm_statarg=-Lc                    # GNU stat (coreutils)
66 else
67         roff2htm_statarg=-Lf                    # BSD stat (base system)
68 fi
69 unset LANGUAGE                                  # GNU locale (LC_MESSAGES)
70 export LC_ALL=C                                 # SUSv3 locale (override)
71
72 function do_convert {
73         typeset -i ws=0
74         col -x | sed -e 's/[     ]*$//g'                                \
75             -e '/-$/N
76 {
77 s/\([0-9A-z][-.,0-9A-z]*\)-\n\(  *\)\([0-9A-z][-.,0-9A-z]*([1-9n][A-z]*)\)\([^ ]*\) /\1\3\4\
78 \2/
79 }'                                                                      \
80             -e '/-$/N
81 {
82 s/\([0-9A-z][-.,0-9A-z]*\)-\n\(  *\)\([0-9A-z][-.,0-9A-z]*([1-9n][A-z]*)\)\([^ ]*\) /\1\3\4\
83 \2/
84 }'                                                                      \
85             -e 'y#&<>#��#'                                              \
86                                                                         \
87             -e '/^[A-Z]/s#.##g'                                       \
88             -e 's#^[A-Z][ ,A-Z0-9]*$#</pre><h2>&</h2><pre>#'            \
89             -e 's#^  \([A-Z][ ,A-Z0-9]*\)$#</pre><h3>\1</h3><pre>#'    \
90                                                                         \
91             -e 's#_\([^�-�][�-�]*\)#<i>\1</i>#g'                       \
92             -e 's#[^�-�][�-�]*\([^�-�][�-�]*\)#<b>\1</b>#g'            \
93                                                                         \
94             -e 's#</\([bi]\)><\1>##g'                                   \
95             -e 's#</b><b>[^�-�][�-�]*##g'                              \
96             -e 's#</b>[^�-�][�-�]*<b>##g'                              \
97             -e 's#[^�-�][�-�]*##g'                                     \
98             -e 's#_</i<b><</b>i>##g'                                    \
99                                                                         \
100             -e 's#^\( \{2,3\}\)\([A-Z][ ,0-9A-z]*\)$#\1<b>\2</b>#'      \
101                                                                         \
102             -e '/^   /s#\(\([0-9A-z][-.,0-9A-z]*\)(\([1-9n]\)[/0-9A-Za-z]*)\)#<a href=\"../man\3/\2.htm\">\1</a>#g' \
103             -e '/^   /s#\(<i>\([0-9A-z][-.,0-9A-z]*\)</i>(\([1-9n]\)[/0-9A-Za-z]*)\)#<a href=\"../man\3/\2.htm\">\1</a>#g' \
104             -e '/^   /s#\(\([0-9A-z][-.,0-9A-z]*\)(\([PSU][MS][DM]\))\)#<a href=\"../man\3/\2.htm\">\1</a>#g' \
105             -e '/^   /s#\(<i>\([0-9A-z][-.,0-9A-z]*\)</i>(\([PSU][MS][DM]\))\)#<a href=\"../man\3/\2.htm\">\1</a>#g' \
106             -e '/^   /s#\(\([0-9A-z][-.,0-9A-z]*\)(\(PAPERS\))\)#<a href=\"../man\3/\2.htm\">\1</a>#g' \
107             -e '/^   /s#\(<i>\([0-9A-z][-.,0-9A-z]*\)</i>(\(PAPERS\))\)#<a href=\"../man\3/\2.htm\">\1</a>#g' \
108                                                                         \
109             -e 's#<b>+</b>\( *\)<b>o</b># \1•#'                         \
110             -e 's#<b>+</b>#•#'                                          \
111             -e 's#</\([bi]\)><\1>##g'                                   \
112             -e 's#</\([bi]\)>\([[:punct:][:space:]]*\)<\1>#\2#g'        \
113             -e 's#\([^[:punct:]]\)\([-!"#$%&'\''()*+,./:;=?@[\]^_`{|}~]*\)\(<[bi]>\)#\1\3\2#g'  \
114             -e 's#\(<i>[fh]t*p:[^<]*\)</i>/#\1/</i>#g'                  \
115             -e 's#\(<i>/[^<]*\)</i>/#\1/</i>#g'                         \
116             -e 's#<h3>*<b>*>#<h3>#g' -e 's#</b></h3>#</h3>#g'           \
117                                                                         \
118             -e 's/�\&#38;/g'                                            \
119             -e 's/�/\&#60;/g'                                           \
120             -e 's/�/\&#62;/g'                                           \
121                                                                         \
122             -e '1s#^#<pre>#'                                            \
123             -e '$s#$#</pre>#'                                           \
124             -e 's#<pre></pre>##g'                                       \
125             -e 's#</pre><pre>##g'                                       \
126             -e 's#<a href="../man'${1:-0}'/#<a href="#g'                \
127         | while IFS= read -r line; do
128                 if [[ -n $line ]]; then
129                         print -r -- "$line"
130                         ws=0
131                 else
132                         (( !ws++ )) && print
133                 fi
134         done
135 }
136
137 function output_header {
138         print '<?xml version="1.0" encoding="utf-8" ?>
139 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
140  "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
141 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"><head>
142  <meta http-equiv="content-type" content="text/html; charset=utf-8" />
143  <title>RTFM '$1\($2')</title>
144  <meta name="robots" content="index, follow" />
145 </head><body>
146 <p>• <a href="../">Index</a> or by <a href="../pkgs/">package</a> or section
147  <a href="../man1/">1</a> <a href="../man2/">2</a> <a href="../man3/">3</a>
148  <a href="../man3p/">3p</a> <a href="../man4/">4</a> <a href="../man5/">5</a>
149  <a href="../man6/">6</a> <a href="../man7/">7</a> <a href="../man8/">8</a>
150  <a href="../man9/">9</a> <a href="../mann/">n</a>
151  • <a href="https://www.freewrt.org/trac/">Homepage</a> •</p><hr />
152 <h1>FreeWRT Manual: <a href="../man'$2/$1'.htm">'$1\($2')</a></h1>'
153 }
154
155 function output_footer {
156         print '<hr /><p style="font-size:xx-small;">Generated on' \
157             $roff2htm_gendate 'by
158  <tt>$Id$</tt></p>
159 <p>These manual pages are <a href="http://www.freewrt.org/licence">copyrighted</a>
160  by their respective writers; their source is available at our <a
161  href="http://www.freewrt.org/downloads/">download area</a> and other mirrors.
162  The rest is Copyright © 2006-2007 <a href="https://www.freewrt.org/">The
163  FreeWRT/OpenADK Project</a>, Germany. <br /><i style="font-size:3pt;">
164  This product includes material provided by Thorsten Glaser. The manpage→HTML
165  converter is from ekkoBSD and <a href="http://mirbsd.de/">MirBSD</a>.</i></p>
166 <p style="font-size:x-small;">This manual page’s HTML representation
167  is supposed to be <a href="http://validator.w3.org/check/referer">valid
168  XHTML/1.1</a>; if not, please send a bug report – diffs preferred.</p>
169 </body></html>'
170 }
171
172 function do_conversion {
173         output_header ${1:-missing-pagename} ${2:-0}
174         do_convert ${2:-0}
175         output_footer
176 }
177
178 # do_conversion_verbose title section infile outfile
179 function do_conversion_verbose {
180         print -nru2 -- $3 → $4
181         do_conversion $1 $2 <$3 >$4
182         print -u2
183 }
184
185 # convert_page /path/to/man.cat1 /targetpath
186 function convert_page {
187         typeset fn=$1 page sect tn
188         typeset -Uui ino=$(stat $roff2htm_statarg %i $fn 2>/dev/null)
189         page=${fn##*/}                  # basename
190         page=${page%.0}                 # manual page name
191         sect=${fn%/*}                   # dirname
192         sect=${sect##*/cat}             # archsection
193         sect=${sect%%/*}                # section
194         tn=man${sect}/${page}.htm       # target file
195
196         if (( ino )) && [[ -n ${roff2htm_inodecache[ino]} ]]; then
197                 # source file is linked to a file we know
198                 print -ru2 -- $tn ← ${roff2htm_inodecache[ino]}
199                 ln -f $2/${roff2htm_inodecache[ino]} $2/$tn
200                 # patch in the additional name(s)
201                 ed -s $2/$tn <<-EOF
202                         /<title>/s#</title>#, $page($sect)&#
203                         /<h1>/s#</h1>#, <a href="../$tn">$page($sect)</a>&#
204                         wq
205                 EOF
206         else
207                 # store target filename in the inode cache
208                 roff2htm_inodecache[ino]=$tn
209                 do_conversion_verbose $page $sect $fn $2/$tn
210         fi
211 }
212
213 # output_htaccess >…/.htaccess
214 function output_htaccess {
215         print DirectoryIndex /dev/null
216         print "AddType 'text/html; charset=utf-8' htm"
217 }
218
219 # convert_all /path/to/share/man /targetpath
220 function convert_all {
221         typeset tp=${2:-$(pwd)/mbsdman}         # target basepath
222         typeset x f
223
224         (find ${1:-/usr/share/man}/cat{[1-9n],3p} -name \*.0 2>&- | sort -f) |&
225         for x in 1 2 3 3p 4 5 6 7 8 9 n; do
226                 mkdir -p $tp/man$x      # one per section
227                 output_htaccess >$tp/man$x/.htaccess
228         done
229         while read -p f; do
230                 convert_page $f $tp     # any subpages
231         done
232 }
Note: See TracBrowser for help on using the browser.