# Copyright (c) 2010 ActiveState Software Inc. All rights reserved.
"""
    pypm.common.package
    ~~~~~~~~~~~~~~~~~~~~~~~

    Represent Python packages stored availble as source and in the binary
    repository.
"""

import os
import os.path as P
import sys
import tempfile
import json
import re
import tarfile
from contextlib import contextmanager
from contextlib import closing
import logging

from pkg_resources import Requirement
from pkg_resources import parse_version
from applib import sh
from applib import _simpledb
from applib.misc import xjoin

from pypm.common import supported

LOG = logging.getLogger(__name__)


class RequirementMixin:
    
    def get_requirements(self, with_extras=(), exclude_default=False):
        """Return a list of requirement objects
        
        Log a warning for missing extras.
        """
        with_extras = tuple(with_extras or ()) # normalize
        if exclude_default:
            extras = set(with_extras)
        else:
            extras = set(('',) + with_extras)
        for extra in extras:
            if extra in self.install_requires:
                for rs in self.install_requires[extra]:
                    yield Requirement.parse(rs)
            else:
                LOG.debug('package "%s" does not define a "%s" extra', 
                          self.name,
                          extra)
                

class SourcePackage(_simpledb.SimpleObject, RequirementMixin):
    """A source package

    This class represents the complete metadata of a source dist.
    """

    FIELDS = ['name', 'version',
              'summary', 'description', 'keywords', 'home_page', 'license',
              'author', 'author_email', 'maintainer', 'maintainer_email',
              'install_requires',
              # Note about 'description' field is below
              # The `description` field already exists in the base class
              # (`BinaryPackage)`). Although it was meant to store package description
              # (the one in PyPI and setup.py's `long_description`), we abuse it to
              # store PyPM specific package notes that gets displayed to the user. We
              # do this in order to not have to introduce new database columns thus
              # breaking database compatibility. It was a mistake that I did not add
              # an "extra" column to store additional data as a JSON string. Therefore,
              # this field we be that "extra" column, thus providing the following JSON
              # structure:
              # self.description is {
              #     'notes': ['...', '...'],
              #      }
              # }
              ]
    
    def __setattr__(self, key, value):
        if key == 'name':
            # Verify that`name` attribute is valid. it:
            # 1. must be 'safe' according to pkg_resources.safe_name
            # 2. cannot contain - (hyphen)
            # 3. case insensitive
            # By adhering to setuptools' naming standards, we inherit its
            # hard-earned wisdom and, thus, avoid unexpected bugs related to
            # package naming.
            if not re.match('^[a-z0-9\-\.]+$', value):
                raise ValueError('unsupported characters in package name: %s' % value)
        super(SourcePackage, self).__setattr__(key, value)
    
    @property
    def full_name(self):
        """Return a descriptive name including version

        This property is typically used for display/diagnose/logging purposes.
        """
        return '{0.name}-{0.version}'.format(self)

    def __key__(self):
        """Return an unique key representing this instance.
        
        By convention, you would return the tuple of primary keys. This method
        is, by inference, used in comparing this instance with others.
        """
        return self.name, self.version

    def __eq__(self, other):
        return self.__key__() == other.__key__()
        
    def __hash__(self):
        return hash(self.__key__())

    def __str__(self):
        return '{0.__class__.__name__}<{0.full_name}>'.format(self)
        
    __repr__ = __str__


# Additional values to BinaryPackage.parse_filename(pkgname_universe)
extra_pkgname_universe = set(['hdf5'])

class BinaryPackage(SourcePackage):
    """A binary package

    A binary package, in addition to the fields of a SourcePackage, have extra
    fields corresponding to architecture, python version compiled for and the
    package verison itself
    """

    FIELDS = SourcePackage.FIELDS + ['osarch', 'pyver', 'pkg_version']
    SEPARATOR = '_'
    EXTENSION = 'pypm'

    def make_filename(self):
        """Make a package filename"""
        return self.SEPARATOR.join([
                '{0.name}-{0.version}'.format(self),
                self.osarch,
                self.pyver,
                str(self.pkg_version)]) + '.' + self.EXTENSION
    @classmethod
    def parse_filename(cls, filename,
                       pkgname_universe,
                       pyver_universe=supported.py_versions,
                       osarch_universe=supported.os_architectures):
        """Parse the filename generated by `make_filename`
        
        - pkgnames_universe  : list of canonical names for all packages. Used for
                               resolving ambiguities in name/version splitting
        
        Consider an example: aspyct-3-0_beta_2_macosx_2.6_1.pypm
        
        Here version number ('3-0_beta_2') is the most arbitrary part, and
        has to be parsed *last*.
        """
        assert pkgname_universe, 'pkgname_universe must not be empty'
        pkgname_universe += extra_pkgname_universe
        filename = P.basename(filename)
        
        # 1. First extract pyver and pkg_version
        m = re.match(r'(.*)_(\d\.\d)_(\d+)\.' + cls.EXTENSION, filename)
        assert m, 'invalid filename: %s' % filename
        prelude, pyver, pkg_version = m.groups()
        assert int(pkg_version) >= 0, "pkg_version for %s is: %s" % (filename,
                                                                     pkg_version)
        
        # 2. Second extract osarch
        osarch, prelude = _match_longest_suffix(
            prelude, cls.SEPARATOR, osarch_universe, filename)
        
        # 3. Third, extract package name; what is left is the version number
        if prelude.count('-') == 1:
            name, version  = prelude.split('-')
        else: # the ambigous case ('-' in version number)
            name, version = _match_longest_prefix(
                prelude, '-', pkgname_universe, filename)
        
        return name, version, osarch, pyver, pkg_version
    
    @property
    def full_name(self):
        """Return a descriptive name including version

        This property is typically used for display/diagnose/logging purposes.
        """
        return '{0.name}-{0.printable_version}'.format(self)
    
    @property
    def printable_version(self):
        """More humane version string.
        
        Equivalent to self.version.
        
        Equivalent to self.version-self.pkg_version if self.pkg_version > 1 -
        because there are, now, more than one package with same self.version
        (albeit with different self.pkg_version's)
        """
        if int(self.pkg_version) > 1:
            return '{0.version}~{0.pkg_version}'.format(self)
        else:
            return self.version
        
    @property
    def version_key(self):
        """Version as key for use in comparison"""
        return (parse_version(str(self.version)),
                parse_version(str(self.pkg_version)))

    def get_notes(self, postinstall=None):
        """Return the list notes for this package

        postinstall - if True, return only notes that are marked for showing
                      during 'postinstall'
        """
        if self.description:
            extra = json.loads(self.description)
            for note in extra.get('notes', []):
                if postinstall is None or note['postinstall'] == postinstall:
                    yield note

        
    def __key__(self):
        return super(BinaryPackage, self).__key__() + (self.pkg_version,)


class ParseError(Exception): pass


def _match_longest_suffix(s, sep, universe, filename):
    """Match the longest suffix with any element from `universe`
    
    Return the matching substring, and the rest 
    """
    parts = s.split(sep)
    for idx, part in list(list(enumerate(parts))):
        substr  = sep.join(parts[idx:])
        if substr in universe:
            rest = sep.join(parts[:idx])
            return substr, rest
    else:
        raise ParseError(
            'cannot partition: %s (filename was %s)' % (
                s, filename))

def _match_longest_prefix(s, sep, universe, filename):
    """Match the longest prefix with any element from `universe`
    
    Return the matching substring, and the rest 
    """
    parts = s.split(sep)
    for idx, part in list(reversed(list(enumerate(parts)))):
        substr  = sep.join(parts[:idx])
        if substr in universe:
            rest = sep.join(parts[idx:])
            return substr, rest
    else:
        raise ParseError(
            'cannot partition: %s (filename was %s)' % (
                s, filename))
    
    
# 
# RepoPackage
#

class RepoPackage(BinaryPackage):
    """A package that lives in the repository"""

    # WARNING: if you are going to modify the fields here, do a code search for
    # ``RepoPackage.create_from`` and fix them as well.
    FIELDS = BinaryPackage.FIELDS + [
        'relpath',     # Relative path to the *.pypm file in remote repository
        'tags',        # Tags for this package (whitespace separated string)
    ]

    def __init__(self, *a, **kw):
        super(RepoPackage, self).__init__(*a, **kw)
        self.__download_url = None

    #
    # Methods for late binding of download URL
    # (RepoPackage has only 'relpath' field, which is not the absolute download
    #  URL)
    # 
    @property
    def download_url(self):
        """Return the download URL of this package, if available"""
        if self.__download_url is None:
            raise ValueError('download_url is unavailable')
        return self.__download_url
    def set_download_url(self, url):
        assert url is not None
        self.__download_url = url
        
    @property
    def requires_be_license(self):
        """Return True of this package requires a Business Edition license"""
        return 'be' in self.tags.split()
                     
    
    
#
# InstalledPackage
#

class InstalledPackage(BinaryPackage):
    """A package that was installed to a Python directory"""

    FIELDS = BinaryPackage.FIELDS + ['files_list']

    def get_files_list(self):
        """Return `files_list` in platform-specific format

        `files_list` always contains forward-slash separated path (like in
        Unix). On Windows, we have to use backward-slash .... which is the
        raison d'etre for this function.
        """
        if sys.platform.startswith('win'):
            return [path.replace('/', '\\') for path in self.files_list]
        else:
            return self.files_list
    

#
# PackageFile
#

class PackageFile(object):
    """Binary package file used in PyPM

    Package files are of the format .tar.gz containing two more files inside:

     - data.tar.gz: contains the actual files to be extracted over sys.prefix
     - info.json: package metadata which is a dump of BinaryPackage fields
    """

    def __init__(self, path):
        """
        path - path to the .pypm file
        """
        self.path = P.abspath(path)

    def get_files_list(self):
        """Return the list files in the package data

        Note: this returns the contents of data.tar.gz (excluding info.json)
        """
        with self._open_data() as tf:
            return tf.getnames()

    def extract_over(self, target_dir):
        """Extract the package data over ``target_dir``"""
        with sh.cd(target_dir):
            with self._open_data() as tf:
                tf.extractall()

    @contextmanager
    def extract_over2(self, target_dir):
        with sh.cd(target_dir):
            with self._open_data() as tf:
                yield tf
                tf.extractall()

    @staticmethod
    def create(path, data_root, bpkg):
        """Create a PyPM package at ``path``
        
        The created package will contain:
        
            - data.tar.gz -- contents of ``data_root`
            - info.json   -- metadata of ``bpkg``
            
        Return the contents of info.json that was added to the package
        """
        assert type(bpkg) is BinaryPackage
        pkgroot = tempfile.mkdtemp('-pkgroot', 'pypm-')

        # generate info.json
        info_json = bpkg.to_json()

        def pack_contents(fn, parentdir):
            """Pack the contents of directory ``parentdir`` into ``fn``"""
            sh.pack_archive(
                fn,
                [xjoin(parentdir, f) for f in os.listdir(parentdir)],
                parentdir)
        
        # create the .tar.gz file (.pypm)
        pack_contents(xjoin(pkgroot, 'data.tar.gz'), data_root)
        open(xjoin(pkgroot, 'info.json'), 'w').write(info_json)
        pack_contents(path, pkgroot)

        sh.rm(pkgroot)
        return info_json

    def to_binary_package(self):
        """Return an instance of ``BinaryPackage``"""
        return BinaryPackage.from_json(self.retrieve_info_json())
        
    def retrieve_info_json(self):
        """Retrieve info.json from the package file"""
        with self._open_pkg()  as tf:
            with closing(tf.extractfile(tf.getmember('info.json'))) as ijf:
                return ijf.read().decode('utf-8')

    @contextmanager
    def _open_data(self):
        """Open data.tar.gz inside the package

        Return the tarfile handle
        """
        # tarfile will ignore all errors during extraction by default. we
        # consider this stupid. 
        errorlevel = 1
        
        tf = tarfile.open(self.path, 'r:gz', errorlevel=errorlevel)
        with self._open_pkg() as tf:
            with closing(tarfile.open(
                fileobj=tf.extractfile('data.tar.gz'),
                mode='r:gz',
                errorlevel=errorlevel)) as dtf:
                yield dtf

    @contextmanager
    def _open_pkg(self):
        """Open package and return the tarfile handle"""
        # tarfile will ignore all errors during extraction by default. we
        # consider this stupid. 
        errorlevel = 1

        with closing(
            tarfile.open(self.path, 'r:gz', errorlevel=errorlevel)) as tf:
            yield tf

