##  Copyright (C) 1998,1999  marduk <marduk@python.net>

##  This program is free software; you can redistribute it and/or modify
##  it under the terms of the GNU General Public License as published by
##  the Free Software Foundation; either version 2 of the License, or
##  (at your option) any later version.

##  This program is distributed in the hope that it will be useful,
##  but WITHOUT ANY WARRANTY; without even the implied warranty of
##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##  GNU General Public License for more details.

##  You should have received a copy of the GNU General Public License
##  along with this program; if not, write to the Free Software
##  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

# $Id: httplink.py,v 1.6 1999/02/26 01:55:08 marduk Exp $

# $Log: httplink.py,v $
# Revision 1.6  1999/02/26 01:55:08  marduk
# ACCEPTS all mime types in mimetypes.py
#
# Identify itself as Linbot x.x in HEAD requests
#
# Revision 1.5  1999/02/21 16:39:51  marduk
# 1.0b8
#
# Revision 1.4  1999/01/10 21:58:19  marduk
# Changed self.* to link.* @line 86 in httplink.py
#
# Revision 1.3  1999/01/10 01:02:16  marduk
# Linbot 1.0b6
#
# Revision 1.2  1998/12/31 03:49:14  marduk
# This is linbot 1.0b5.  See CHANGES
#
# Revision 1.1.1.1  1998/12/20 23:17:12  marduk
# initial 1.0
#

"""This module defines the functions needed for creating Link objects for urls
using the http scheme"""

__version__='1.0'
__author__ = 'marduk (marduk@python.net)'

import myUrlLib
import string
import httplib
import urllib
import time
import urlparse
import mimetypes

config = myUrlLib.config
Link = myUrlLib.Link
proxies = config.PROXIES
if proxies is None:
    proxies = urllib.getproxies()
redirect_depth = 0

opener = urllib.FancyURLopener(proxies)
opener.addheaders = [('User-agent','Linbot ' + __version__)]

def get_reply(url):
    """Open connection to url and report information given by HEAD command"""

    global redirect_depth
    parsed = urlparse.urlparse(url)
    if proxies and proxies.has_key('http'):
	host = urlparse.urlparse(proxies['http'])[1]
	document = url

    else:
	host = parsed[1]
    	document = string.join(parsed[2:4],'')

    if not document: document = '/'

    h = httplib.HTTP(host)


    h.putrequest('HEAD',document)
    h.putrequest('User-Agent','Linbot %s' % __version__)
    h.putheader('Host',host)
    for mime_type in mimetypes.types_map.values():
        h.putheader('Accept',mime_type)
    h.endheaders()

    errcode, errmsg, headers = h.getreply()
    h.close()
    #print errcode
    #print errmsg
    if errcode == 301 or errcode == 302:
	redirect_depth = redirect_depth + 1
	if redirect_depth > 5:  # hardcoded, will remove later
	    print '\tToo many redirects!'
	    redirect_depth = 0
	    return (errcode, errmsg, headers, url)
        redirect = headers['location']
	redirect = urlparse.urljoin(url,redirect)
	if redirect == url:
	    print '\tRedirect same as source: %s' % redirect
	    redirect_depth = 0
	    return (errcode, errmsg, headers, url)
	print '\tRedirected to: ' + redirect
	if Link.linkList.has_key(redirect):
	    link = Link.linkList[redirect]
	    return (link.status, link.message, link.headers, link.URL)
        return get_reply(redirect)
    return (errcode, errmsg, headers, url)

def init(self, url, parent):
    """ Here, self is a reference of the link object that is calling this
    pseudo-method"""

    (self.status, self.message, self.headers, self.URL) = get_reply(myUrlLib.basejoin(parent,url))
    Link.linkList[self.URL] = self
    try:
	self.type = self.headers.gettype()
    except AttributeError:
	self.type = 'text/html' # is this a good enough default?

    #print '\tContent-type: ' + self.type
    try:
	self.size = int(self.headers['content-length'])
    except (KeyError, TypeError):
	self.size = 0

    if self.status != 200:
	self.set_bad_link(self.URL,str(self.status) + ": " +  self.message)
	return

    try:
	lastMod = time.mktime(self.headers.getdate('Last-Modified'))
    except (OverflowError, TypeError, ValueError):
	lastMod = None
    if lastMod:
	self.age = int((time.time()-lastMod)/myUrlLib.SECS_PER_DAY)

def get_document(url):
    document = opener.open(url).read()
    opener.cleanup()
    return document
