#
# $Header: /usr/local/cvsroot/pythondoc/docregex.py,v 1.2 1999/05/01 01:01:15 daniel Exp $
#
# Copyright (C) Daniel Larsson
# All Rights Reserved.
#
# See copyright notice in the file 'LICENSE.TXT', which should have accompanied
# this distribution.

"""Parse utilities for python docstrings.

This module contains parse functions and regular expressions
for markups valid in python docstrings (valid = supported by
gendoc).
"""

import re
import string

letter_digit_ = string.letters + string.digits + '_'

# Punctuation and other characters that may follow a
# proper markup.
startm = '(?P<start_char>\s|^)'
endm = '(?P<end_char>[\s,.:;!?]|$)'

# Find out how many leading tabs/spaces are used in a docstring.
leading_tab_regex = re.compile('.*\n+(?P<spaces> *)')

# The Python manuals recommend the following layout of
# docstrings:
# """One-liner describing fun/class/module
# <empty line>
# Longer description of fun/class/module"""
oneliner_regex = re.compile('(?P<oneliner>.*)(\n *)+\n')
oneliner_only_regex = re.compile('.*')

# Strong:
# format: "**strong text**"
strong_regex=re.compile(startm + "\*\*(?P<text>[^ \t][^\n*]*[^ \t])\*\*" + endm, re.MULTILINE)

# Emphasized:
# format: "*emphasized*"
emph_regex = re.compile(startm + "\*(?P<text>[^ \t][^\n*]*[^ \t])\*" + endm, re.MULTILINE)

# Bullet:
# format: "* bulleted list"
#         "- bulleted list"
#         "o bulleted list"

bullet_regex = re.compile(r'(?P<bullet>\s*[o*-]\s+)(?P<text>[^\0]*)')

# Numbered list:
# format: "1. some text"
#         "a. some text"
#         "1  some text"
ol_regex = re.compile(r'(\s*|^)(?P<number>(\d+|[a-zA-Z]+)\.)+\s+(?P<text>[^\0]*|$)')
ol2_regex = re.compile('\s*(?P<number>(\d+|[a-zA-Z]+)\.)+[ \t]+(?P<text>.*)$')
olp_regex = re.compile(r'\s*(?P<number>\d+)\s+(?P<text>[^\0]*|$)')

# Definition List:
# format: "term -- definition text"
#
dl_regex = re.compile(r'[ \t]*(?P<term>[^\n]+)[ \t]+--\s+(?P<definition>[^\0]*)')

# Hypertext link:
# format: '[this is a link]'
hyperdef_word = '\[(?P<text>.*?)\]'
hypertext_regex = re.compile(startm + hyperdef_word + endm)

# Hypertext definition:
# format: '.. [this is a link] http://www.anne.elk'
hyperdef_str = '^\.\.\s+'+hyperdef_word+'\s+(?P<linkmode>(?:\S*:)|(?:))(?P<hyperlink>\S*)$'
# hyperdef_str = '\.\.\s+'+hyperdef_word+'\s+(?P<hyperlink>\S*)\n'
hyperdef_regex = re.compile(hyperdef_str, re.MULTILINE)

# Example section:
#
example_regex = re.compile('[Ee][Xx][Aa][Mm][Pp][Ll][Ee][Ss]?:\s*$')

# Arguments section:
#
arguments_regex = re.compile('[Aa][Rr][Gg][Uu][Mm][Ee][Nn][Tt][Ss]?:\s*$')

# Exceptions section:
#
exception_regex = re.compile('[Ee][Xx][Cc][Ee][Pp][Tt][Ii][Oo][Nn][Ss]?:\s*$')

# Usage section:
#
usage_regex = re.compile('[Uu][Ss][Aa][Gg][Ee]:\s*$')

# Code fragment (within '')
code_regex = re.compile(startm + "'(?P<text>.+?)'" + endm)

# Some code heuristics
codeheuristics_regex = re.compile("(\S\()|(^\s*#)")
pycode_regex = re.compile("^>>>")

# Builtin doc heading
# format: 'name(args) -- docstring'
builtin_head_regex = re.compile(r'(?P<name>\w*)\((?P<args>[^)]*)\)[ ]*--[ ]*(?P<docstring>.*)')

# Email regex
# format: 'some.name@somewhere.com'
ldd = '[' + string.letters + string.digits + '.' + ']+'
email_regex = re.compile(ldd + '@' + ldd)

def test():
    s = """This is an **example** text, using all *markups* possible.
    Here's a list:

    * Bullet 1
      - Bullet 1.1
      - Bullet 1.2
    * Bullet 2
    * Bullet 3
      - Bullet 3.1

    A numbered list:

    1 Norwegian Blue
    2 Belgian Blue
    3 American Blues

    A differently numbered list:

    a. Using letters!
    b. Some people fancy that.
    c. Enough bullets...

    Some definitions:

    King Arthur -- Graham Chapman
    Sir Foobar -- Michael Palin

    You can **also** *use links*, such as [Monty Python].

    .. [Strange Link] foo.bar.etc
    .. [Monty Python] http://www.montypython.com/

    Example:

    pickles = pickle(cucumber)
    """

    # Find strong text
    m = strong_regex.match(s)

    m = builtin_head_regex.match('load(file) -- Load a pickle from ...')
    print m.group('name'), m.group('args'), m.group('docstring')

    m = hyperdef_regex.search(s)
    print m.groups()

    m = hyperdef_regex.search(".. [Strange Link] foo.bar.etc")
    print m.groups()

    m = emph_regex.search("*test=foo*")
    print m.groups()

if __name__ == '__main__':
    test()

#
# $Log: docregex.py,v $
# Revision 1.2  1999/05/01 01:01:15  daniel
# Removed Windows style line endings.
#
# 
# *****************  Version 4  *****************
# User: Daniel       Date: 98-08-11   Time: 21:05
# Updated in $/Pythondoc
# - Fixed the hyperlink definition regex. Now it must be first on a line.
# - Added a crude "Python code" regex. It just checks if the line begins
# with '>>>'. It should allow whitespace in front...
# 
# *****************  Version 3  *****************
# User: Daniel       Date: 98-08-06   Time: 17:12
# Updated in $/Pythondoc
# Added email regular expression. Used to figure out if '__author__'
# contains one.
# 
# *****************  Version 2  *****************
# User: Daniel       Date: 98-07-31   Time: 2:33
# Updated in $/Pythondoc
# Fixed 'hyperdef_word' regexp definition.
# 
