# Copyright (c) 2010 ActiveState Software Inc. All rights reserved.
"""
    net.py
    ~~~~~~
    
    Contains a single `download_file` method to download from the network with
    support for,

    1. progress bar (via applib.textui) support.
    2. ETag
    3. download cache 
"""

from __future__ import unicode_literals

import os.path as P
import logging
import json
from datetime import datetime
from collections import namedtuple

from applib import sh
from applib import textui
import six
import six.moves
if six.PY3:
    from base64 import encodebytes
else:
    from base64 import encodestring as encodebytes

from pypm.common.util import BareDateTime
from pypm.common.util import get_user_agent

__all__ = ['download_file', 'DownloadError']

LOG = logging.getLogger(__name__)

    
def download_file(url, target_dir, urlgetter_kargs=None, interactive=True):
    """Download ``url`` to ``target_dir``

    - urlgetter_kargs: kwargs to URLGetter.get(...) method
    - interactive: if True, show progress bar.

    Raise ``DownloadError`` when there is an error

    Return (downloaded_file, actually_downloaded?)
    """
    assert '\n' not in url, 'url has whitespace'
    url = url.split('#', 1)[0]   # remove url anchor
    
    if not urlgetter_kargs:
        urlgetter_kargs = {}
    
    # basename apparently works on URLs too
    # http://stackoverflow.com/questions/1112545/
    target_file = P.abspath(P.join(target_dir, P.basename(url)))
    
    LOG.debug('Downloading %s', url)
    if interactive:
        dl = DownloaderWithProgress(url)
    else:
        dl = Downloader(url)
    downloaded = dl.get(target_file, **urlgetter_kargs)
    return target_file, downloaded


# raised by `Downloader.get`
class DownloadError(Exception):
    """Wrapping exception for all network related errors"""
    

# -- internal support code

class URLGetter(object):
    """A fine-grained URL getter. An improved `urllib.urlretrieve`"""

    def __init__(self, url):
        """
        - url: URL to download
        """
        self.url = url

    def get(self,
            target_file,
            start_info=None,
            use_cache=False,
            save_properties=False,
            auth=None):
        """Download the URL and save it to `target_file`

        Appropriate hook functions are automatically invoked

        - start_info: message to print (LOG.info) when transfer begins, cache is
          validated or or an error occurs; it can also be a callable.

        - use_cache: if True, download will not actually happen if `target_file`
          is already available *and* its ETag is not changed in the server. For
          this to work, the previous call to `get` must have passed
          `save_properties=True` (in order to save the ETag).

        - save_properties: if True, a file named .$target_file.urlprops
          containing request headers and other metadata will be saved along side
          the target_file. (This is required for `use_cache` in future
          downloads)
          
        - auth: (username, password) -- optional http basic auth data

        Return True only if the download actually happened.
        """
        def invoke_start_info(status):
            if not start_info:
                return
            if six.callable(start_info):
                i = start_info(status)
            else:
                i = start_info.format(status=status)
            if i:
                LOG.info(i)
            
        if not P.exists(target_file):
            use_cache = False

        urlprops = URLProperties(target_file)
        props = urlprops.load()
        if props:
            # write back the new value of `last_attempt_utc` *now* so we don't
            # have to deal with it when an exception arises later.
            # last_attempt_utc is simpy the time of the last download attempt
            props.custom['last_attempt_utc'] = BareDateTime.to_string(datetime.utcnow())
            urlprops.save(props.headers, props.custom)

        req = None
        if use_cache and props:
            # Enable the cache header `If-None-Match`
            etag = props.headers.get('ETag', props.headers.get('etag', None))
            if etag:
                req = six.moves.Request(self.url, headers={'If-None-Match': etag})
            else:
                LOG.warn('no ETag in last headers: %s', props.headers)
                req = six.moves.Request(self.url)
        else:
            req = six.moves.Request(self.url)
            
        if auth:
            username, password = auth
            req.add_header('Authorization', _create_http_basicauth_header(
                username, password
            ))

        # Set User-Agent
        # XXX: (in 2.6) urllib2.py does not expose its default user-agent string
        # so we copy-paste that code here (from urllib2.OpenerDirector)
        urllib2_user_agent = "Python-urllib/%s" % six.moves.urllib_version
        pypm_user_agent = get_user_agent(urllib2_user_agent)
        assert pypm_user_agent
        req.add_header('User-Agent', pypm_user_agent)

        try:
            u = six.moves.urlopen(req)
        except six.moves.HTTPError as e:
            if e.code == 304 and use_cache:
                invoke_start_info('Hit')
                return False # file not changed in server
            else:
                invoke_start_info('Get')
                raise
        else:
            invoke_start_info('Get')

        bs = 1024*8 # block size; from urllib.py:urlretrieve
        headers = u.info()

        # detect total size of the file to be downloaded
        if 'Content-Length' in headers:
            total_size = int(headers['Content-Length'])
            assert total_size >= 0
        else:
            total_size = None

        total_bytes_transferred = 0
        chunk_size = 0

        # Hook 1: initialize
        self.hook_initialize(total_size)

        with open(target_file + '.part', 'wb') as f:
            while True:
                if total_size:
                    assert total_bytes_transferred <= total_size, str((
                        total_bytes_transferred, total_size))

                # Hook 2: transferring
                self.hook_transferring(chunk_size, total_bytes_transferred)
                
                data = u.read(bs)
                chunk_size = len(data)
                
                if len(data) == 0:
                    break
                
                f.write(data)
                total_bytes_transferred += chunk_size

        sh.mv(target_file + '.part', target_file)

        # Hook 3: completed
        self.hook_completed()
        
        # save the new headers and other properties
        if save_properties:
            custom_dict = props and props.custom or {} # reuse existing props.custom
            custom_dict['last_attempt_utc'] = BareDateTime.to_string(datetime.utcnow())
            urlprops.save(dict(headers), custom_dict)

        return True

    #
    # hooks for various download events
    #
    def hook_initialize(self, total_size):
        """Called just before the download starts
        
        - total_size: total size of the URL; None if it is unknown
        """
    def hook_transferring(self, chunk_size, total_bytes_transferred):
        """Called while the download happens
        
        - chunk_size: bytes transferred in this read
        - total_bytes_transferred: total bytes transferred so far
        """
    def hook_completed(self):
        """Called when the download is complete and the target file is closed"""


class URLProperties(object):
    """Properties file for the downloaded URL.

    Named `.{filename}.urlprops`, it is is a JSON dictionary with two keys:

      - headers: the HTTP headers dictionary
      - custom: any custom data stored as a dictionary

    URL property file can be used to retrieve download-related data in
    future.

    To automatically create this file, invoke URLGetter like::

        >>> URLGetter.get(..., save_properties=True)
    """

    Data = namedtuple('URLProperties_Data', 'headers custom')

    def __init__(self, target_file):
        """
        - target_file: Local file where the URL is/would be downloaded
        """
        self.target_file = target_file

    @property
    def urlprops_filename(self):
        assert P.isabs(self.target_file), 'not a absolute path: %s' % self.target_file
        return P.join(P.dirname(self.target_file),
                      '.{basename}.urlprops'.format(
                          basename=P.basename(self.target_file)))

    def load(self):
        """Load URL properties, as saved by the `save` method above.

        Returned value is a namedtuple containing `headers` and `custom`
        dicts as passed to the `save()` method.

        Return None, if no target file or urlprops file is found.
        """
        if P.exists(self.target_file):
            fn = self.urlprops_filename
            if P.exists(fn):
                LOG.debug('loading urlprops: %s', fn)
                with open(fn) as f:
                    data = json.load(f)
                    assert tuple(sorted(data.keys())) == ('custom', 'headers'), \
                        'invalid keys: %s' % data.keys()
                    return self.Data(data['headers'], data['custom'])

    def save(self, headers_dict, custom_dict):
        """Save headers_dict and custom_dict"""
        assert P.exists(self.target_file)
        fn = self.urlprops_filename
        LOG.debug('writing urlprops: %s', fn)
        data = dict(headers=headers_dict, custom=custom_dict)
        json.dumps(data) # catch json errors early (before writing to file)
        with open(fn, 'w') as f:
            json.dump(data, f)
        
        
class Downloader(URLGetter):
    """An URLGetter that raises ``DownloadError`` on various network errors

    Useful for catching such errors with one simple try..except.. block.
    """
    
    def get(self, *args, **kwargs):
        """Catch network errors and reraise a simple version of them"""
        try:
            return super(Downloader, self).get(*args, **kwargs)
        except six.moves.HTTPError as e:
            # propagate HTTP events
            raise
        except six.moves.URLError as e:
            raise DownloadError("Failed to download <%s>. Reason: %s" % (
                self.url, e))    


class DownloaderWithProgress(Downloader):
    """A Downloader with progress bar"""

    def hook_initialize(self, total_size):
        if total_size is None:
            # XXX: progress bar cannot be used as total_size is unknown
            # TODO: so, implement a progress bar with optional total_size
            self.progress = None
        else:
            self.progress = textui.ProgressBar(
                total_size, show_size=_byteshr, note='downloading')

    def hook_transferring(self, chunk_size, total_bytes_transferred):
        if self.progress:
            self.progress.tick(items=chunk_size)

    def hook_completed(self):
        if self.progress:
            self.progress.close()

        
def _byteshr(bytes):
    """Human-readable version of bytes count"""
    for x in ['bytes','KB','MB','GB','TB']:
        if bytes < 1024.0:
            return "%3.1f%s" % (bytes, x)
        bytes /= 1024.0
    raise ValueError('cannot find human-readable version')


def _create_http_basicauth_header(username, password):
    s = '{0}:{1}'.format(username, password)
    base64string = encodebytes(s.encode('utf8'))[:-1]
    authheader = six.b('Basic ') + base64string
    return authheader