# newbiedoc.sed
# Creates a modified local version of a page from the NewbieDOC wiki.
# Mediawiki 1.11.0
#
# Copyright 2007 Chris Lale.
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.


# Download pages from the NewbieDOC site using
# wget -p --convert-links -i newbiedoc-urls.txt
# Then run this script on each downloaded page.

# Remove MediaWiki head scripts
/<!-- Head Scripts -->/,/skins\/common\/ajax.js?97/ d

# Remove links to favicon, search box, navigation, creative commons action and gnu.org
/<link rel="shortcut icon"/ d
/<link rel="search"/ d
/<link title="Creative Commons"/ d
/<link rel="copyright"/ d
/jump-to-nav/ d

# Remove link to stylesheet for print
/<link rel="stylesheet"/ d

# Remove references to text and javascript
/<style type="text\/css"/,/<\/style>/ d
/<script type= "text\/javascript">/,/<\/script>/ d
/wikibits js/ d
/site js/ d

# Tidy up remaining IE fixes
/if lt IE 7/,/endif/ d

# Remove timestamp comment
/Saved in parser cache with key newbiedoc-nd/ d

# Insert horizontal rule before footer
/Retrieved from "<a/ i <hr\/>
# Correct external link to Berlios site for URL of retrieved page.
s/Retrieved from "<a href="\(.*\)">\(.*\)<\/a>"<\/div>/Retrieved from "<a href="\2">\2<\/a>"<\/div>/

# Remove [edit] link from headings
s/<span class="editsection">\[.*\]<\/span>//
s/<a href="\/wiki\//<a href="/
s/"\s*title=/\.html" title=/

# Add .html to local links
/href=".*#/ s/href="\(.*\)#/href="\1\.html#/
# except user page links
s/newbiedoc.berlios.de\/wiki\/User:\(.*\)\.html/newbiedoc.berlios.de\/wiki\/User:\1/

# Remove code for menus etc and tidy up tags at end of document
/<!-- end content -->/,/Served in / c \Content is available under <a href="http://www.gnu.org/copyleft/fdl.html" class="external " title="http://www.gnu.org/copyleft/fdl.html" rel="nofollow">GNU Free Documentation License 1.2</a>, unless otherwise stated.</div></div></div></div></body></html>
