﻿# -*- coding: utf-8 -*-

import re
import base64
from time import time
from io import BytesIO

import tw_util
from tw_util import *
__TW_VER__ = tw_util.__TW_VER__

MDEBUG = False

class TwRedirectAgent(BrowserLikeRedirectAgent):

	def _handleRedirect(self, response, method, uri, headers, redirectCount):
		locationHeaders = response.headers.getRawHeaders('location', [])
		if locationHeaders:
			location = self._resolveLocation(uri, locationHeaders[0])
			headers.addRawHeader('tw_location', location)
		return BrowserLikeRedirectAgent._handleRedirect(self, response, method, uri, headers, redirectCount)

class WebClientContextFactory(ClientContextFactory): #do not verify https requests
	def getContext(self, hostname, port):
		return ClientContextFactory.getContext(self)

class ProxyClientContextFactory(ClientContextFactory): #do not verify https requests
	def getContext(self, hostname, port):
		return ClientContextFactory.getContext(self)

class TunnelError(Exception):
	"""An HTTP CONNECT tunnel could not be established by the proxy."""

class TunnelingTCP4ClientEndpoint(TCP4ClientEndpoint):
	"""An endpoint that tunnels through proxies to allow HTTPS downloads. To
	accomplish that, this endpoint sends an HTTP CONNECT to the proxy.
	The HTTP CONNECT is always sent when using this endpoint, I think this could
	be improved as the CONNECT will be redundant if the connection associated
	with this endpoint comes from the pool and a CONNECT has already been issued
	for it.
	"""

	_responseMatcher = re.compile('HTTP/1\.. 200')

	def __init__(self, reactor, host, port, proxyConf, contextFactory,
				timeout=30, bindAddress=None):
		proxyHost, proxyPort, self._proxyAuthHeader = proxyConf
		super(TunnelingTCP4ClientEndpoint, self).__init__(reactor, proxyHost,
			proxyPort, timeout, bindAddress)
		self._tunnelReadyDeferred = Deferred()
		self._tunneledHost = host
		self._tunneledPort = port
		self._contextFactory = contextFactory

	def requestTunnel(self, protocol):
		"""Asks the proxy to open a tunnel."""
		tunnelReq = 'CONNECT %s:%s HTTP/1.1\r\n' % (self._tunneledHost,
												self._tunneledPort)
		if self._proxyAuthHeader:
			tunnelReq += 'Proxy-Authorization: %s\r\n' % self._proxyAuthHeader
		tunnelReq += '\r\n'
		protocol.transport.write(tunnelReq)
		self._protocolDataReceived = protocol.dataReceived
		protocol.dataReceived = self.processProxyResponse
		self._protocol = protocol
		return protocol

	def processProxyResponse(self, bytes):
		"""Processes the response from the proxy. If the tunnel is successfully
		created, notifies the client that we are ready to send requests. If not
		raises a TunnelError.
		"""
		self._protocol.dataReceived = self._protocolDataReceived
		if  TunnelingTCP4ClientEndpoint._responseMatcher.match(bytes):
			self._protocol.transport.startTLS(self._contextFactory,
											self._protocolFactory)
			self._tunnelReadyDeferred.callback(self._protocol)
		else:
			self._tunnelReadyDeferred.errback(
				TunnelError('Could not open CONNECT tunnel.'))

	def connectFailed(self, reason):
		"""Propagates the errback to the appropriate deferred."""
		self._tunnelReadyDeferred.errback(reason)

	def connect(self, protocolFactory):
		self._protocolFactory = protocolFactory
		connectDeferred = super(TunnelingTCP4ClientEndpoint,
								self).connect(protocolFactory)
		connectDeferred.addCallback(self.requestTunnel)
		connectDeferred.addErrback(self.connectFailed)
		return self._tunnelReadyDeferred

class TunnelingAgent(Agent):
	"""An agent that uses a L{TunnelingTCP4ClientEndpoint} to make HTTPS
	downloads. It may look strange that we have chosen to subclass Agent and not
	ProxyAgent but consider that after the tunnel is opened the proxy is
	transparent to the client; thus the agent should behave like there is no
	proxy involved.
	"""

	def __init__(self, reactor, proxyConf, contextFactory=None,
				connectTimeout=None, bindAddress=None, pool=None):
		super(TunnelingAgent, self).__init__(reactor, contextFactory, connectTimeout, bindAddress, pool)
		self._proxyConf = proxyConf
		self._contextFactory = contextFactory

	if __TW_VER__ >= (15, 0, 0):
		def _getEndpoint(self, uri):
			return TunnelingTCP4ClientEndpoint(
				self._reactor, uri.host, uri.port, self._proxyConf,
				self._contextFactory, self._endpointFactory._connectTimeout,
				self._endpointFactory._bindAddress)
	else:
		def _getEndpoint(self, scheme, host, port):
			return TunnelingTCP4ClientEndpoint(
				self._reactor, host, port, self._proxyConf,
				self._contextFactory, self._connectTimeout,
				self._bindAddress)

class Request(object):

	def __init__(self, url, callback=None, method='GET', headers=None, body=None,
				cookies=None, meta=None, encoding='utf-8', errback=None):

		self._encoding = encoding
		self.method = str(method).upper()
		self._set_url(url)
		self._set_body(body)

		assert callback or not errback, "Cannot use errback without a callback"
		self.callback = callback
		self.errback = errback

		self.cookies = cookies or {}
		self.headers = Headers()
		if headers:
			for n, v in headers.iteritems():
				self.headers.addRawHeader(n, v)

		self._meta = dict(meta) if meta else None

	@property
	def meta(self):
		if self._meta is None:
			self._meta = {}
		return self._meta

	def _get_url(self):
		return self._url

	def _set_url(self, url):
		self._url = url

		if ':' not in self._url:
			raise ValueError('Missing scheme in request url: %s' % self._url)

	url = property(_get_url, _set_url)

	def _get_body(self):
		return self._body

	def _set_body(self, body):
		if body is None:
			self._body = b''
		else:
			self._body = tw_util.to_bytes(body, self.encoding)

	body = property(_get_body, _set_body)

	@property
	def encoding(self):
		return self._encoding

	def __str__(self):
		return "<%s %s>" % (self.method, self.url)

	__repr__ = __str__

	def copy(self):
		"""Return a copy of this Request"""
		return self.replace()

class _ResponseReader(Protocol):

	def __init__(self, finished, txresponse, request):
		self._finished = finished
		self._txresponse = txresponse
		self._request = request
		self._bodybuf = BytesIO()
		self._bytes_received = 0

	def dataReceived(self, bodyBytes):
		self._bodybuf.write(bodyBytes)
		self._bytes_received += len(bodyBytes)

	def connectionLost(self, reason):
		if self._finished.called:
			return

		body = self._bodybuf.getvalue()
		if reason.check(ResponseDone):
			self._finished.callback((self._txresponse, body, None))
		elif reason.check(PotentialDataLoss):
			self._finished.callback((self._txresponse, body, ['partial']))
		else:
			self._finished.errback(reason)

class TwAgent(object):

	_Agent = Agent
	_ProxyAgent = ProxyAgent
	_TunnelingAgent = TunnelingAgent
	_RedirectAgent = TwRedirectAgent
	_CookieAgent = CookieAgent

	def __init__(self, contextFactory=None, connectTimeout=10, bindAddress=None, pool=None, followRedirect=False, redirectLimit=None, gzip_decoding=False, cookies=None):
		self._contextFactory = contextFactory
		self._connectTimeout = connectTimeout
		self._bindAddress = bindAddress
		self._pool = pool
		self._cookies = cookies
		self._followRedirect = followRedirect
		self._redirectLimit = redirectLimit
		self._file_download = False

	def _get_agent(self, request, timeout):
		bindaddress = request.meta.get('bindaddress') or self._bindAddress
		proxy = request.meta.get('proxy')
		if proxy:
			_, _, proxyHost, proxyPort, proxyParams = tw_util._parse(proxy)
			scheme = tw_util._parse(request.url)[0]
			omitConnectTunnel = proxyParams.find('noconnect') >= 0
			if  scheme == 'https' and not omitConnectTunnel:
				proxyConf = (proxyHost, proxyPort, request.headers.getRawHeaders("Proxy-Authorization", [None])[0])
				return self._TunnelingAgent(reactor, proxyConf,
					contextFactory=self._contextFactory, connectTimeout=timeout,
					bindAddress=bindaddress, pool=self._pool)
			else:
				endpoint = TCP4ClientEndpoint(reactor, proxyHost, proxyPort, timeout=timeout, bindAddress=bindaddress)
				return self._ProxyAgent(endpoint)

		return self._Agent(reactor, contextFactory=self._contextFactory,
			connectTimeout=timeout, bindAddress=bindaddress, pool=self._pool)
		#return self._Agent(reactor, contextFactory=self._contextFactory,
		#	connectTimeout=timeout, bindAddress=bindaddress)

	def download_request(self, request):
		timeout = request.meta.get('download_timeout') or self._connectTimeout
		agent = self._get_agent(request, timeout)

		if self._cookies != None:
			agent = self._CookieAgent(agent, self._cookies)

		#_agent = agent
		if self._followRedirect and not request.meta.get('getlocation'):
			agent = self._RedirectAgent(agent, redirectLimit=self._redirectLimit)

		# request details
		url = urldefrag(request.url)[0]
		method = request.method
		if isinstance(agent, self._TunnelingAgent):
			request.headers.removeHeader('Proxy-Authorization')
		bodyproducer = StringProducer(request._body) if request._body else None

		d_print('req url:',method,url)
		d_print('postdata:',request.body)
		start_time = time()
		d = agent.request(method, url, headers=request.headers, bodyProducer=bodyproducer)
		# set download latency
		d.addCallback(self._cb_latency, request, start_time)
		# response body is ready to be consumed
		d.addCallback(self._cb_bodyready, request)
		d.addCallback(self._cb_bodydone, request, url)

		# check download timeout
		if timeout:
			self._timeout_cl = reactor.callLater(timeout, d.cancel)
			d.addBoth(self._cb_timeout, request, url, timeout)
		return d

	def _cb_timeout(self, result, request, url, timeout):
		if self._timeout_cl.active():
			self._timeout_cl.cancel()
			return result
		raise TimeoutError("Getting %s took longer than %s seconds." % (url, timeout))

	def _cb_latency(self, result, request, start_time):
		request.meta['download_latency'] = time() - start_time
		return result

	def _cb_bodyready(self, txresponse, request):
		# deliverBody hangs for responses without body
		if txresponse.length == 0:
			return txresponse, '', None

		def _cancel(_):
			txresponse._transport._producer.loseConnection()

		d = Deferred(_cancel)
		txresponse.deliverBody(_ResponseReader(d, txresponse, request))

		return d

	def _cb_bodydone(self, result, request, url):
		txresponse, body, flags = result
		status = int(txresponse.code)

		if request.meta.get('addlocation'):
			location = request.headers.getRawHeaders("tw_location", [""])[0]
			location = location if location else url
			return body, location
		elif request.meta.get('getlocation'):
			if 'Forbidden' in txresponse.phrase:
				return txresponse.phrase
			else:
				location = txresponse.headers.getRawHeaders("location", [""])[0]
				return location if location else url

		return result[1]

class StringProducer:
	implements(IBodyProducer)

	def __init__(self, body):
		self.body = body
		self.length = len(body)

	def startProducing(self, consumer):
		consumer.write(self.body)
		return succeed(None)

	def pauseProducing(self):
		pass

	def stopProducing(self):
		pass

class TwAgentHelper(object):

	DEBUG_HEADER = False
	_Agent = TwAgent

	def __init__(self, p_user=None, p_pass=None, proxy_url=None,  gzip_decoding=False, redir_agent=False, use_tls=False, cookieJar=None, redirectLimit=20, headers=None, connectTimeout=10, downloadTimeout=60):
		d_print( "Twisted Agent in use", __TW_VER__)
		self.headers = headers.copy() if headers else {}
		self.proxy_url = proxy_url
		self.useProxy = self.proxy_url != None
		self.body = None
		self.downloadTimeout = downloadTimeout
		if self.useProxy:
			if p_user and p_pass:
				auth = 'Basic ' + base64.b64encode("%s:%s" % (p_user, p_pass)).strip()
				self.headers['Proxy-Authorization'] = auth
			self.agent = self._Agent(contextFactory=ClientContextFactory(), followRedirect=redir_agent, redirectLimit=redirectLimit, gzip_decoding=gzip_decoding, cookies=cookieJar, connectTimeout=connectTimeout)
		else:
			self.agent = self._Agent(contextFactory=WebClientContextFactory(), followRedirect=redir_agent, redirectLimit=redirectLimit, gzip_decoding=gzip_decoding, cookies=cookieJar, connectTimeout=connectTimeout)

	def getRedirectedUrl(self, url, redir=True):
		d_print( "getRedirectedUrl: ", url)
		request = Request(url, method='HEAD', headers=self.headers)
		request.meta['download_timeout'] = self.downloadTimeout or 60
		request.meta['getlocation'] = True
		return self.agent.download_request(request)

	def getWebPage(self, url, method='GET', postdata=None, addlocation=False):
		d_print( "getWebPage:",url)
		request = Request(url, method=method, body=postdata, headers=self.headers)
		request.meta['download_timeout'] = self.downloadTimeout or 60
		request.meta['proxy'] = self.proxy_url
		request.meta['addlocation'] = addlocation
		d_print( 'agent type',self.agent)
		return self.agent.download_request(request)

class TwAgentFactory(TwAgentHelper):
    def __init__(self, method='GET', postdata=None, headers=None,
                 agent="Twisted PageGetter", timeout=None, cookieJar=None,
                 followRedirect=True, redirectLimit=20, gzip_decoding=True, addlocation=False, proxy_url=None, p_user=None, p_pass=None, download_timeout=None):

		TwAgentHelper.__init__(self, gzip_decoding=gzip_decoding, redir_agent=followRedirect, cookieJar=cookieJar, redirectLimit=redirectLimit, headers=headers, connectTimeout=timeout, downloadTimeout=download_timeout)
		if agent:
			self.headers['User-Agent'] = agent

def twAgentGetPage(url, **kwargs):
	twAgent = TwAgentFactory(**kwargs)
	request = Request(url, method=kwargs.get('method', 'GET'), body=kwargs.get('postdata'), headers=twAgent.headers)
	request.meta['download_timeout'] = twAgent.downloadTimeout or 60
	request.meta['addlocation'] = kwargs.get('addlocation', False)
	return twAgent.agent.download_request(request)

def d_print(*args):
	if MDEBUG:
		s = ''
		for arg in args:
			s += str(arg)
		print s

__all__ = ["__TW_VER__", "TwAgentHelper", "twAgentGetPage", "TwDownloadAgent"]