﻿# -*- coding: utf-8 -*-
# ------------------------------------------------------------
# smytvshow
# Tools for page scraping
# http://smystero.dlinkddns.org/smytvwhow/
# ------------------------------------------------------------

import urlparse, urllib2, urllib, httplib
import time
import os
import re
import socket

from xbmcutils import config
from xbmcutils import logger

CACHE_PATH = config.get_setting("cache.dir")
DEBUG_LEVEL = 0

def_headers = [['User-Agent', 'Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0']]
def_timeout = socket.getdefaulttimeout()


def cache_page(url, post=None, headers=def_headers, timeout=def_timeout):
    logger.debug("start...")
    try:
        data = download_page(url, post, headers, timeout=timeout)
    except Exception, e:
        logger.error(e.message)
        data = ""

    logger.debug("end...")
    return data


def download_page(url, post=None, headers=def_headers, follow_redirects=True, timeout=10, header_to_get=None,
                  bypass_ddos=1):
    logger.debug('start...')

    #logger.info("Url=[%s]" % url)

    if post is not None:
        logger.debug("Url requested via Post=[%s]" % post)
    else:
        logger.debug("Url requested via Get")

    cookies_file = get_cookies_file()
    logger.debug("Cookies file=[%s]" % cookies_file)

    urlopen = None
    request = None

    cj = None
    ClientCookie = None
    cookielib = None

    try:
        logger.debug("Try using cookielib...")
        import cookielib
    except ImportError:
        try:
            logger.debug("Try using ClientCookie...")
            import ClientCookie
        except ImportError:
            logger.debug("No cookie library available")
            urlopen = urllib2.urlopen
            request = urllib2.Request
        else:
            logger.debug("ClientCookie available")
            urlopen = ClientCookie.urlopen
            request = ClientCookie.Request
            cj = ClientCookie.MozillaCookieJar()

    else:
        logger.debug("cookielib available")
        urlopen = urllib2.urlopen
        request = urllib2.Request

        try:
            cj = cookielib.MozillaCookieJar()
            cj.set_policy(MyCookiePolicy())
        except Exception, e:
            logger.debug(e.message)

    if cj is not None:
        logger.debug("Cookie manager available.")
        if os.path.isfile(cookies_file):
            logger.debug("Importing cookies from [%s]" % cookies_file)
            try:
                cj.load(cookies_file, ignore_discard=True)
            except:
                logger.debug("Cookie file [%s] is damaged. Removed.")
                os.remove(cookies_file)

        if cookielib is not None:
            logger.debug("Using urllib2 with cookielib")
            if not follow_redirects:
                opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL),
                                              urllib2.HTTPCookieProcessor(cj), NoRedirectHandler())
            else:
                opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=DEBUG_LEVEL),
                                              urllib2.HTTPCookieProcessor(cj))
            urllib2.install_opener(opener)

        else:
            logger.debug("Using ClientCookie with ClientCookie")
            opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj))
            ClientCookie.install_opener(opener)

    start = time.clock()

    tx_headers = {}

    logger.debug("Adding headers")
    logger.debug("---------------------------------------------")
    for header in headers:
        logger.debug("Header %s=%s" % (str(header[0]), str(header[1])))
        tx_headers[header[0]] = header[1]
    logger.debug("---------------------------------------------")

    req = request(url, post, tx_headers)

    curr_timeout = socket.getdefaulttimeout()
    try:
        handle = None
        if timeout is None:
            logger.debug("Do the request WITHOUT timeout")
            handle = urlopen(req)
        else:
            logger.debug("Do the request WITH timeout=[%d]" % timeout)
            socket.setdefaulttimeout(timeout)
            handle = urlopen(req)
            socket.setdefaulttimeout(curr_timeout)
        logger.debug("Done!")

        if cj is not None:
            cj.save(cookies_file, ignore_discard=True)  #  ,ignore_expires=True
            logger.debug("Cookies saved")

        if handle.info().get('Content-Encoding') == 'gzip':
            logger.debug("Content is gzipped")
            import StringIO

            data = handle.read()
            compressed_stream = StringIO.StringIO(data)
            import gzip

            logger.debug("Unzipping...")
            gzipper = gzip.GzipFile(fileobj=compressed_stream)
            data = gzipper.read()
            gzipper.close()
            logger.debug("Done!")
        else:
            logger.debug("Content is normal")
            data = handle.read()

        info = handle.info()
        handle.close()
    except urllib2.HTTPError, e:
        socket.setdefaulttimeout(curr_timeout)
        if handle is not None:
            info = handle.info()
            handle.close()
        logger.error("HTTP Error: " + str(e.code))
        data = e.read()
        if e.code == 503:
            logger.debug("Error content =[%s]" % data)
            logger.debug("Checking for DDoS")
            v = re.search('DDoS(.*?)CloudFlare', data)
            if v is not None:
                logger.debug("DDos CloudFlare")
                if bypass_ddos == 1:
                    logger.debug("Try to bypass")
                    logger.debug("Calculating the authorization url")
                    new_url = bypass_DDoS(urlparse.urlparse(url), data)

                    time.sleep(3)

                    logger.debug("Try to get authorization cookies from url=[%s]" % new_url)
                    download_page(new_url, post, headers, False, timeout, header_to_get, 0)
                    logger.debug("Cookie grabbed from authorization url. Reissuing original request.")
                    data = download_page(url, post, headers, follow_redirects, timeout, header_to_get, 2)
                    return data
        else:
            '''
            import traceback
            logger.error(traceback.format_exc())
            logger.error("data=" + repr(data))
            '''
            return data

    logger.debug("Getting requested headers")
    logger.debug("---------------------------------------------")
    for header in info:
        logger.debug("Header %s=%s" % (header, info[header]))
        if header_to_get is not None:
            if header == header_to_get:
                data = info[header]
    logger.debug("---------------------------------------------")

    fin = time.clock()
    logger.debug("Downloaded in [%d] second(s)" % (fin - start + 1))

    return data


def bypass_DDoS(url, data):

    v = re.search('\"([a-fA-F\d]{32})\"', data)
    vc = v.group(1)
    logger.debug("DDos 1 %s " % vc)

    v = re.search('name="pass" value="([^"]+)"', data)
    xpass = v.group(1)
    logger.debug("DDos 1a %s " % xpass)

    v = re.search('f, (.*?);', data)
    init = v.group(1)
    logger.debug("DDos 2 %s " % init)

    v = re.search('\"(.*?)\"', init)
    var = v.group(1)
    logger.debug("DDos 3 %s " % var)

    v = re.search('\:(.*?)\}', init)
    val = v.group(1)
    logger.debug("DDos 4 %s " % val)

    val = jsParse(val)
    logger.debug("DDos 5 %s " % val)

    l0 = var + '=' + val
    exec l0
    logger.debug("DDos 6 %s " % l0)

    v = re.search('(.*?)=', init)
    obj = v.group(1)
    logger.debug("DDos  7 %s " % obj)

    pattern = ';' + obj + '\.(.*?);a.value'
    v = re.search(pattern, data)
    expr = v.group(1)
    logger.debug("DDos  8 %s " % expr)

    p = re.compile('(' + obj + '\.)')
    expr = p.sub('', expr)
    logger.debug("DDos  9 %s " % expr)

    p = re.compile(r';')
    lines = p.split(expr);
    for line in lines:
        logger.debug("DDos xx %s " % line)
        v = re.search('=(.*)', line)
        t = v.group(1)
        val1 = jsParse(t)
        logger.debug("DDos yy %s " % val1)

        v = re.search(var + '(.*?)=', line)
        oper = v.group(1)
        logger.debug("DDos zz %s " % oper)

        l0 = var + oper + '=' + val1
        exec l0
        logger.debug("DDos kk %s " % l0)

    answer = eval(var + '+' + str(len(url.netloc)))
    logger.debug("DDos 10 %s " % answer)

    output = 'http://%s/cdn-cgi/l/chk_jschl?jschl_vc=%s&pass=%s&jschl_answer=%d' % (url.netloc, vc, xpass, answer)

    return output


def jsParse(s):
    ret = s
    p = re.compile(r'\!\!\[\]|\!\+\[\]')
    ret = p.sub('1', ret)
    p = re.compile(r'\+\[\]\)')
    ret = p.sub(')*10', ret)
    p = re.compile(r'\(\)')
    ret = p.sub('(0)', ret)
    return ret


import cookielib


class MyCookiePolicy(cookielib.DefaultCookiePolicy):
    def set_ok(self, cookie, request):
        #logger.debug("set_ok Cookie "+repr(cookie)+" request "+repr(request))
        #cookie.discard = False
        #cookie.
        devuelve = cookielib.DefaultCookiePolicy.set_ok(self, cookie, request)
        #logger.debug("set_ok "+repr(devuelve))
        return devuelve

    def return_ok(self, cookie, request):
        #logger.debug("return_ok Cookie "+repr(cookie)+" request "+repr(request))
        #cookie.discard = False
        devuelve = cookielib.DefaultCookiePolicy.return_ok(self, cookie, request)
        #logger.debug("return_ok "+repr(devuelve))
        return devuelve

    def domain_return_ok(self, domain, request):
        #logger.debug("domain_return_ok domain "+repr(domain)+" request "+repr(request))
        devuelve = cookielib.DefaultCookiePolicy.domain_return_ok(self, domain, request)
        #logger.debug("domain_return_ok "+repr(devuelve))
        return devuelve

    def path_return_ok(self, path, request):
        #logger.debug("path_return_ok path "+repr(path)+" request "+repr(request))
        devuelve = cookielib.DefaultCookiePolicy.path_return_ok(self, path, request)
        #logger.debug("path_return_ok "+repr(devuelve))
        return devuelve
'''

        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Host': 'www.meristation.com',
        'Accept-Language': 'es-es,es;q=0.8,en-us;q=0.5,en;q=0.3',
        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
        'Keep-Alive': '300',
        'Connection': 'keep-alive'}
'''

def get_filename_from_url(url):
    import urlparse

    parsed_url = urlparse.urlparse(url)
    try:
        filename = parsed_url.path
    except:
        if len(parsed_url) >= 4:
            filename = parsed_url[2]
        else:
            filename = ""

    return filename


def get_cookies_file():
    if not os.path.exists(config.plugin_dir):
        os.makedirs(config.plugin_dir, 0777)

    cookies_file = os.path.join(config.plugin_dir, 'cookies.dat')

    return cookies_file


def checkUrl(url):
    p = urlparse.urlparse(url)
    conn = httplib.HTTPConnection(p.netloc)
    conn.request('HEAD', p.path)
    resp = conn.getresponse()
    return resp.status < 400


class NoRedirectHandler(urllib2.HTTPRedirectHandler):
    def http_error_302(self, req, fp, code, msg, headers):
        infourl = urllib.addinfourl(fp, headers, req.get_full_url())
        infourl.status = code
        infourl.code = code
        return infourl

    http_error_300 = http_error_302
    http_error_301 = http_error_302
    http_error_303 = http_error_302
    http_error_307 = http_error_302


