##############################################################################
# 
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
# 
# Copyright (c) Digital Creations.  All rights reserved.
# 
# This license has been certified as Open Source(tm).
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 
# 1. Redistributions in source code must retain the above copyright
#    notice, this list of conditions, and the following disclaimer.
# 
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions, and the following disclaimer in
#    the documentation and/or other materials provided with the
#    distribution.
# 
# 3. Digital Creations requests that attribution be given to Zope
#    in any manner possible. Zope includes a "Powered by Zope"
#    button that is installed by default. While it is not a license
#    violation to remove this button, it is requested that the
#    attribution remain. A significant investment has been put
#    into Zope, and this effort will continue if the Zope community
#    continues to grow. This is one way to assure that growth.
# 
# 4. All advertising materials and documentation mentioning
#    features derived from or use of this software must display
#    the following acknowledgement:
# 
#      "This product includes software developed by Digital Creations
#      for use in the Z Object Publishing Environment
#      (http://www.zope.org/)."
# 
#    In the event that the product being advertised includes an
#    intact Zope distribution (with copyright and license included)
#    then this clause is waived.
# 
# 5. Names associated with Zope or Digital Creations must not be used to
#    endorse or promote products derived from this software without
#    prior written permission from Digital Creations.
# 
# 6. Modified redistributions of any form whatsoever must retain
#    the following acknowledgment:
# 
#      "This product includes software developed by Digital Creations
#      for use in the Z Object Publishing Environment
#      (http://www.zope.org/)."
# 
#    Intact (re-)distributions of any official Zope release do not
#    require an external acknowledgement.
# 
# 7. Modifications are encouraged but must be packaged separately as
#    patches to official Zope releases.  Distributions that do not
#    clearly separate the patches from the original work must be clearly
#    labeled as unofficial distributions.  Modifications which do not
#    carry the name Zope may be packaged in any form, as long as they
#    conform to all of the clauses above.
# 
# 
# Disclaimer
# 
#   THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
#   EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
#   PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
#   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
#   USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
#   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
#   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
#   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
#   SUCH DAMAGE.
# 
# 
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations.  Specific
# attributions are listed in the accompanying credits file.
# 
##############################################################################

#
# Specifically, most of this code is from PyXML's xml.dom.ext.Printer.
# See LICENSE.Fourthought.
#

"""
Printing and XML generating support for DOM classes.
"""
import re
import string

# regexps used by _translateCdata(),
# made global to compile once.
# see http://www.xml.com/axml/target.html#dt-character
ILLEGAL_LOW_CHARS = '[\x01-\x08\x0B-\x0C\x0E-\x1F]'
SURROGATE_BLOCK = '[\xF0-\xF7][\x80-\xBF][\x80-\xBF][\x80-\xBF]'
ILLEGAL_HIGH_CHARS = '\xEF\xBF[\xBE\xBF]'
#Note: Prolly fuzzy on this, but it looks as if characters from the surrogate block are allowed if in scalar form, which is encoded in UTF8 the same was as in surrogate block form
XML_ILLEGAL_CHAR_PATTERN = re.compile('%s|%s'%(ILLEGAL_LOW_CHARS, ILLEGAL_HIGH_CHARS))
# the characters that we will want to turn into entrefs
# We must do so for &, <,  and > following ]].
# The xml parser has more leeway, but we're not the parser.
# http://www.xml.com/axml/target.html#dt-chardata
# characters that we must *always* turn to entrefs:
g_cdataCharPatternReq = re.compile('[&<]|]]>')
g_charToEntityReq = {
        '&': '&amp;',
        '<': '&lt;',
        ']]>': ']]&gt;',
        }
# characters that we must turn to entrefs in attr values:
g_cdataCharPattern = re.compile('[&<>"\']|]]>')
g_charToEntity = {
        '&': '&amp;',
        '<': '&lt;',
        '>': '&gt;',
        '"': '&quot;',
        "'": '&apos;',
        ']]>': ']]&gt;',
        }

# HTML nodes to always be minimzied, else never minimize
# from PyXML's xml.dom.html
# http://www.w3.org/TR/xhtml1/#guidelines
HTML_FORBIDDEN_END = ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME',
                      'HR', 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']

from DOM.Core import Node, XMLNS_NS, XML_NS
from DOM import Traversal
import sys

class Visitor:
    """A class to visit an entire tree according to a TreeWalker."""

    # These are both used for html mode.  These aren't the types that
    # the server returns; 'html' covers '*/html', etc.
    contentTypes = ['html', 'xml']
    
    def __init__(self, root, stream = sys.stdout, encoding = None,
                 html = 0, contentType = None,
                 whatToShow = Traversal.NodeFilter.SHOW_ALL,
                 filter = None, entityReferenceExpansion = 1):
        if root.ownerDocument:
            doc = root.ownerDocument
        else:
            doc = root
        self.tw = doc.createTreeWalker(root, whatToShow, filter,
                                       entityReferenceExpansion)
        self.stream = stream
        self.encoding = encoding
        self.html = html

        if contentType and contentType not in self.contentTypes:
            raise RuntimeError, (
                "Bad content type %s; valid types are 'html', 'xml'"
                % contentType)
        self.contentType = contentType

        # elt/attr names: html */html upcase, html */xml lowercase.
        # http://www.w3.org/TR/xhtml1/#guidelines        
        self.namePrint = lambda string: string # identity
        if self.contentType and self.html:
            if self.contentType == 'html':
                self.namePrint = string.upper
            elif self.contentType == 'xml':
                self.namePrint = string.lower

        # maps node type to visitor method
        self.NODE_TYPES = {
            Node.ELEMENT_NODE: self.visitElement,
            Node.ATTRIBUTE_NODE: self.visitAttr,
            Node.TEXT_NODE: self.visitText,
            Node.CDATA_SECTION_NODE: self.visitCDATASection,
            Node.ENTITY_REFERENCE_NODE: self.visitEntityReference,
            Node.ENTITY_NODE: self.visitEntity,
            Node.PROCESSING_INSTRUCTION_NODE: \
            self.visitProcessingInstruction,
            Node.COMMENT_NODE: self.visitComment,
            Node.DOCUMENT_NODE: self.visitDocument,
            Node.DOCUMENT_TYPE_NODE: self.visitDocumentType,
            Node.DOCUMENT_FRAGMENT_NODE: self.visitDocumentFragment,
            Node.NOTATION_NODE: self.visitNotation,
            }


    # methods to drive the walker

    def visitWhole(self):
        curNode = self.tw.currentNode
        self.visit(curNode, start = 1)
        self.visitChildren()
        self.visit(curNode, start = 0)        

    __call__ = visitWhole

    def visitChildren(self):
        if self.tw.firstChild():
            self.visitWhole()
            while (self.tw.nextSibling()):
                self.visitWhole()
            self.tw.parentNode()

    # methods that do stuff once we're there, without moving the walker        
    def visitElement(self, node, start):
        self.visitGeneric(node)
    def visitAttr(self, node, start):
        self.visitGeneric(node)
    def visitText(self, node, start):
        self.visitGeneric(node)
    def visitCDATASection(self, node, start):
        self.visitGeneric(node)
    def visitEntityReference(self, node, start):
        self.visitGeneric(node)
    def visitEntity(self, node, start):
        self.visitGeneric(node)
    def visitProcessingInstruction(self, node, start):
        self.visitGeneric(node)
    def visitComment(self, node, start):
        self.visitGeneric(node)
    def visitDocument(self, node, start):
        self.visitGeneric(node)
    def visitDocumentType(self, node, start):
        self.visitGeneric(node)
    def visitDocumentFragment(self, node, start):
        self.visitGeneric(node)
    def visitNotation(self, node, start):
        self.visitGeneric(node)

    def visitGeneric(self, node):
        self.stream.write("visit %s node %s\n"%(node.nodeType, node.nodeName))

    def visit(self, node, start):
        "Find the element type and call the appropriate visit method"
        visitMethod = self.NODE_TYPES.get(node.nodeType, None)
        if visitMethod is None:
            raise TypeError, (
                "Cannot print unknown nodeType: %s" % node.nodeType)
        else:
            return visitMethod(node, start)

# we'll want hooks to generalize this for prettyprinting etc:
# add formatting, named constants for < etc so we can &lt; etc.
class PrintVisitor(Visitor):
    """A class to generate XML for a tree according to a TreeWalker"""

    def visitElement(self, node, start):
        if start:
            self.stream.write('<' + self.namePrint(node.tagName))
            st = ''
            for item in node.attributes.values():
                self.visitAttr(item, start=1)
            if not node.hasChildNodes():
                if self.html:
                    if string.upper(node.tagName) not in HTML_FORBIDDEN_END:
                        self.stream.write('></' + self.namePrint(node.tagName)
                                          + '>')
                    else:
                        self.stream.write(' />')
                else:
                    self.stream.write('/>')
            else:
                self.stream.write('>')
        else:
            if node.hasChildNodes():
                self.stream.write('</' + self.namePrint(node.tagName) + '>')

    def visitAttr(self, node, start):
        if start and node.specified:
            text, delimiter = _translateCdataAttr(node.value)
            self.stream.write(" %s=%s%s%s" %
                              (self.namePrint(node.name), delimiter,
                               text, delimiter))

    def visitText(self, node, start):
        if start:
            self.stream.write(_translateCdata(node.data, self.encoding))

    def visitCDATASection(self, node, start):
        if start:
            self.stream.write('<![CDATA[')
            self.stream.write(string.replace(node.data,
                                             "]]>", "]]]><![CDATA[]>"))
            self.stream.write(']]>')

    def visitEntityReference(self, node, start):
        if start:
            self.stream.write('&')
            self.stream.write(node.nodeName)
            self.stream.write(';')

    def visitEntity(self, node, start):
        if start:
            st = "<!ENTITY " + node.nodeName
            if not node.systemId:
                # internal entity
                s = node.firstChild.data
                st = '%s "%s"' % (st, _translateCdata(s, self.encoding))
            if node.publicId:
                st = st + ' PUBLIC "%s"' % node.publicId
                if node.systemId:
                    st = '%s "%s"' % (st, node.systemId)
            elif node.systemId:
                st = st + ' SYSTEM "%s"' % node.systemId
            if node.notationName:
                st = st + ' NDATA %s' % node.notationName
            self.stream.write(st + '>\n')

    def visitNotation(self, node, start):
        if start:
            st = "<!NOTATION %s" % node.nodeName
            if node.publicId:
                st = st + ' PUBLIC "%s"' % node.publicId
                if node.systemId:
                    st = '%s "%s"' % (st, node.systemId)
            elif node.systemId:
                st = st + ' SYSTEM "%s"' % node.systemId
            self.stream.write(st + '>\n')

    def visitProcessingInstruction(self, node, start):
        if start:
            self.stream.write('<?')
            self.stream.write(node.target + ' ')
            self.stream.write(node.data)
            self.stream.write('?>')

    def visitComment(self, node, start):
        if start:
            self.stream.write('<!--')
            self.stream.write(node.data)
            self.stream.write('-->')

    def visitDocument(self, node, start):
        if start:
            if not self.html:
                self.stream.write('<?xml version="1.0"')
                if self.encoding:
                    self.stream.write(' encoding="%s"' % self.encoding)
                self.stream.write(' ?>\n')
        else:
            self.stream.write('\n') # Add a final newline

    def visitDocumentType(self, node, start):
        if start:
            if not node.entities.length and not node.notations.length and \
               not node.systemId:
                return

            self.stream.write("<!DOCTYPE " + node.name)
            if node.systemId:
                if node.publicId:
                    if '"' not in node.publicId:
                        self.stream.write(' PUBLIC "' + node.publicId + '" ')
                    else:
                        self.stream.write(" PUBLIC '" + node.publicId + "' ")
                else:
                    self.stream.write(' SYSTEM ')

                if '"' not in node.systemId:
                    self.stream.write('"' + node.systemId + '"')
                else:
                    self.stream.write("'" + node.systemId + "'")

            if node.internalSubset:
                self.stream.write(" [%s]" % node.internalSubset)
            elif node.entities.length or node.notations.length:
                self.stream.write(' [\n')
                for i in range(node.entities.length):
                    self.visitEntity(node.entities.item(i), start=1)
                for i in range(node.notations.length):
                    self.visitNotation(node.notations.item(i), start=1)
                self.stream.write(']')
            self.stream.write('>\n')

    def visitDocumentFragment(self, node, start):
        pass   # we're just here for the children

def _translateCdata(characters, allEntRefs = None, encoding='UTF-8'):
    """Translate characters into a legal format."""
    if not characters:
        return ''
    if allEntRefs: # translate all chars to entrefs; for attr value
        if g_cdataCharPattern.search(characters):
            new_string = g_cdataCharPattern.subn(
                lambda m, d=g_charToEntity: d[m.group()],
                characters)[0]
        else:
            new_string = characters
    else: # translate only required chars to entrefs
        if g_cdataCharPatternReq.search(characters):
            new_string = g_cdataCharPatternReq.subn(
                lambda m, d=g_charToEntityReq: d[m.group()],
                characters)[0]
        else:
            new_string = characters
    # This was never used, & I never got it anyway - prettyprinting?
    #if prev_chars[-2:] == ']]' and characters[0] == '>':
    #    new_string = '&gt;' + new_string[1:]
    # Note: use decimal char entity rep because some browsers are broken
    # FIXME: This will bomb for high characters.
    # Should, for instance, detect the UTF-8 for 0xFFFE
    # and put out &#xFFFE;
    if XML_ILLEGAL_CHAR_PATTERN.search(new_string):
        new_string = XML_ILLEGAL_CHAR_PATTERN.subn(
            lambda m: '&#%i;' % ord(m.group()),
            new_string)[0]
    #new_string = utf8_to_code(new_string, encoding) # XXX ugh
    return new_string

def _translateCdataAttr(characters):
    """
    Translate attribute value characters into a legal format;
    return the value and the delimiter used.
    """
    if not characters:
        return '', '"'
    if '"' not in characters or "'" in characters:
        delimiter = '"'
        new_chars = _translateCdata(characters, allEntRefs = 1)
        new_chars = re.sub("&apos;", "'", new_chars)
    else:
        delimiter = "'"
        new_chars = _translateCdata(characters, allEntRefs = 1)            
        new_chars = re.sub("&quot;", '"', new_chars)
    #FIXME: There's more to normalization
    #Convert attribute new-lines to character entity
    # characters is possibly shorter than new_chars (no entities)
    # I think this was a prettyprinting issue, newlines aren't illegal
    # http://www.xml.com/axml/target.html#NT-AttValue
    #if "\n" in characters:
    #    new_chars = re.sub('\n', '&#10;', new_chars)
    return new_chars, delimiter
