#
# misc.py -- Miscellaneous utilities.
#
# Copyright (c) 2007-2009  Christian Hammond
# Copyright (c) 2007-2009  David Trowbridge
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#


import logging
import os
import zlib

try:
    import hashlib
    new_md5 = hashlib.md5
except ImportError:
    import md5
    new_md5 = md5.new

try:
    import cPickle as pickle
except ImportError:
    import pickle

try:
    from cStringIO import StringIO
except ImportError:
    from StringIO import StringIO

from django.core.cache import cache
from django.conf import settings
from django.conf.urls.defaults import url, RegexURLPattern
from django.contrib.sites.models import Site
from django.db.models.manager import Manager
from django.utils.translation import ugettext as _
from django.views.decorators.cache import never_cache


DEFAULT_EXPIRATION_TIME = 60 * 60 * 24 * 30 # 1 month
CACHE_CHUNK_SIZE = 2**20 - 1024 # almost 1M (memcached's slab limit)

# memcached key size constraint (typically 250, but leave a few bytes for the
# large data handling)
MAX_KEY_SIZE = 240


class MissingChunkError(Exception):
    pass


def _cache_fetch_large_data(cache, key, compress_large_data):
    chunk_count = cache.get(key)
    data = []

    chunk_keys = ['%s-%d' % (key, i) for i in range(int(chunk_count))]
    chunks = cache.get_many(chunk_keys)
    for chunk_key in chunk_keys:
        try:
            data.append(chunks[chunk_key][0])
        except KeyError:
            logging.info('Cache miss for key %s.' % chunk_key)
            raise MissingChunkError

    data = ''.join(data)

    if compress_large_data:
        data = zlib.decompress(data)

    try:
        unpickler = pickle.Unpickler(StringIO(data))
        data = unpickler.load()
    except Exception, e:
        logging.warning("Unpickle error for cache key %s: %s." % (key, e))
        raise e

    return data


def _cache_store_large_data(cache, key, data, expiration, compress_large_data):
    # We store large data in the cache broken into chunks that are 1M in size.
    # To do this easily, we first pickle the data and compress it with zlib.
    # This gives us a string which can be chunked easily. These are then stored
    # individually in the cache as single-element lists (so the cache backend
    # doesn't try to convert binary data to utf8). The number of chunks needed
    # is stored in the cache under the unadorned key
    file = StringIO()
    pickler = pickle.Pickler(file)
    pickler.dump(data)
    data = file.getvalue()

    if compress_large_data:
        data = zlib.compress(data)

    i = 0
    while len(data) > CACHE_CHUNK_SIZE:
        chunk = data[0:CACHE_CHUNK_SIZE]
        data = data[CACHE_CHUNK_SIZE:]
        cache.set('%s-%d' % (key, i), [chunk], expiration)
        i += 1
    cache.set('%s-%d' % (key, i), [data], expiration)

    cache.set(key, '%d' % (i + 1), expiration)


def cache_memoize(key, lookup_callable,
                  expiration=getattr(settings, "CACHE_EXPIRATION_TIME",
                                     DEFAULT_EXPIRATION_TIME),
                  force_overwrite=False,
                  large_data=False,
                  compress_large_data=True):
    """Memoize the results of a callable inside the configured cache.

    Keyword arguments:
    expiration          -- The expiration time for the key.
    force_overwrite     -- If True, the value will always be computed and stored
                           regardless of whether it exists in the cache already.
    large_data          -- If True, the resulting data will be pickled, gzipped,
                           and (potentially) split up into megabyte-sized chunks.
                           This is useful for very large, computationally
                           intensive hunks of data which we don't want to store
                           in a database due to the way things are accessed.
    compress_large_data -- Compresses the data with zlib compression when
                           large_data is True.
    """
    key = make_cache_key(key)

    if large_data:
        if not force_overwrite and cache.has_key(key):
            try:
                data = _cache_fetch_large_data(cache, key, compress_large_data)
                return data
            except Exception, e:
                logging.warning('Failed to fetch large data from cache for key %s: %s.' % (key, e))
        else:
            logging.info('Cache miss for key %s.' % key)

        data = lookup_callable()
        _cache_store_large_data(cache, key, data, expiration,
                                compress_large_data)
        return data

    else:
        if not force_overwrite and cache.has_key(key):
            return cache.get(key)
        data = lookup_callable()

        # Most people will be using memcached, and memcached has a limit of 1MB.
        # Data this big should be broken up somehow, so let's warn about this.
        # Users should hopefully be using large_data=True in this case.
        # XXX - since 'data' may be a sequence that's not a string/unicode,
        #       this can fail. len(data) might be something like '6' but the
        #       data could exceed a megabyte. The best way to catch this would
        #       be an exception, but while python-memcached defines an exception
        #       type for this, it never uses it, choosing instead to fail
        #       silently. WTF.
        if len(data) >= CACHE_CHUNK_SIZE:
            logging.warning("Cache data for key %s (length %s) may be too big "
                            "for the cache." % (key, len(data)))

        try:
            cache.set(key, data, expiration)
        except:
            pass
        return data


def make_cache_key(key):
    """Creates a cache key guaranteed to avoid conflicts and size limits.

    The cache key will be prefixed by the site's domain, and will be
    changed to an MD5SUM if it's larger than the maximum key size.
    """
    try:
        site = Site.objects.get_current()

        # The install has a Site app, so prefix the domain to the key.
        key = "%s:%s" % (site.domain, key)
    except:
        # The install doesn't have a Site app, so use the key as-is.
        pass

    # Adhere to memcached key size limit
    if len(key) > MAX_KEY_SIZE:
        digest = new_md5(key).hexdigest();

        # Replace the excess part of the key with a digest of the key
        key = key[:MAX_KEY_SIZE - len(digest)] + digest

    # Make sure this is a non-unicode string, in order to prevent errors
    # with some backends.
    key = str(key)

    return key


def get_object_or_none(klass, *args, **kwargs):
    if isinstance(klass, Manager):
        manager = klass
        klass = manager.model
    else:
        manager = klass._default_manager

    try:
        return manager.get(*args, **kwargs)
    except klass.DoesNotExist:
        return None


def never_cache_patterns(prefix, *args):
    """
    Prevents any included URLs from being cached by the browser.

    It's sometimes desirable not to allow browser caching for a set of URLs.
    This can be used just like patterns().
    """
    pattern_list = []
    for t in args:
        if isinstance(t, (list, tuple)):
            t = url(prefix=prefix, *t)
        elif isinstance(t, RegexURLPattern):
            t.add_prefix(prefix)

        t._callback = never_cache(t.callback)
        pattern_list.append(t)

    return pattern_list


def generate_media_serial():
    """
    Generates a media serial number that can be appended to a media filename
    in order to make a URL that can be cached forever without fear of change.
    The next time the file is updated and the server is restarted, a new
    path will be accessed and cached.

    This will crawl the media files (using directories in MEDIA_SERIAL_DIRS if
    specified, or all of MEDIA_ROOT otherwise), figuring out the latest
    timestamp, and return that value.
    """
    MEDIA_SERIAL = getattr(settings, "MEDIA_SERIAL", 0)

    if not MEDIA_SERIAL:
        media_dirs = getattr(settings, "MEDIA_SERIAL_DIRS", ["."])

        for media_dir in media_dirs:
            media_path = os.path.join(settings.MEDIA_ROOT, media_dir)

            for root, dirs, files in os.walk(media_path):
                for name in files:
                    mtime = int(os.stat(os.path.join(root, name)).st_mtime)

                    if mtime > MEDIA_SERIAL:
                        MEDIA_SERIAL = mtime

        setattr(settings, "MEDIA_SERIAL", MEDIA_SERIAL)


def generate_ajax_serial():
    """
    Generates a serial number that can be appended to filenames involving
    dynamic loads of URLs in order to make a URL that can be cached forever
    without fear of change.

    This will crawl the template files (using directories in TEMPLATE_DIRS),
    figuring out the latest timestamp, and return that value.
    """
    AJAX_SERIAL = getattr(settings, "AJAX_SERIAL", 0)

    if not AJAX_SERIAL:
        template_dirs = getattr(settings, "TEMPLATE_DIRS", ["."])

        for template_path in template_dirs:
            for root, dirs, files in os.walk(template_path):
                for name in files:
                    mtime = int(os.stat(os.path.join(root, name)).st_mtime)

                    if mtime > AJAX_SERIAL:
                        AJAX_SERIAL = mtime

        setattr(settings, "AJAX_SERIAL", AJAX_SERIAL)


def generate_cache_serials():
    """
    Wrapper around generate_media_serial and generate_ajax_serial to
    generate all serial numbers in one go.

    This should be called early in the startup, such as in the site's
    main urls.py.
    """
    generate_media_serial()
    generate_ajax_serial()