seafile_ynh/sources/seafile-server-3.1.1/seahub/thirdpart/Djblets-0.6.14.dev-py2.6.egg/djblets/util/misc.py
Elie 0f063400be Add sources
Former-commit-id: 2e47976ec5
2014-08-06 22:42:37 -04:00

312 lines
10 KiB
Python

#
# misc.py -- Miscellaneous utilities.
#
# Copyright (c) 2007-2009 Christian Hammond
# Copyright (c) 2007-2009 David Trowbridge
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
import logging
import os
import zlib
try:
import hashlib
new_md5 = hashlib.md5
except ImportError:
import md5
new_md5 = md5.new
try:
import cPickle as pickle
except ImportError:
import pickle
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from django.core.cache import cache
from django.conf import settings
from django.conf.urls.defaults import url, RegexURLPattern
from django.contrib.sites.models import Site
from django.db.models.manager import Manager
from django.utils.translation import ugettext as _
from django.views.decorators.cache import never_cache
DEFAULT_EXPIRATION_TIME = 60 * 60 * 24 * 30 # 1 month
CACHE_CHUNK_SIZE = 2**20 - 1024 # almost 1M (memcached's slab limit)
# memcached key size constraint (typically 250, but leave a few bytes for the
# large data handling)
MAX_KEY_SIZE = 240
class MissingChunkError(Exception):
pass
def _cache_fetch_large_data(cache, key, compress_large_data):
chunk_count = cache.get(key)
data = []
chunk_keys = ['%s-%d' % (key, i) for i in range(int(chunk_count))]
chunks = cache.get_many(chunk_keys)
for chunk_key in chunk_keys:
try:
data.append(chunks[chunk_key][0])
except KeyError:
logging.info('Cache miss for key %s.' % chunk_key)
raise MissingChunkError
data = ''.join(data)
if compress_large_data:
data = zlib.decompress(data)
try:
unpickler = pickle.Unpickler(StringIO(data))
data = unpickler.load()
except Exception, e:
logging.warning("Unpickle error for cache key %s: %s." % (key, e))
raise e
return data
def _cache_store_large_data(cache, key, data, expiration, compress_large_data):
# We store large data in the cache broken into chunks that are 1M in size.
# To do this easily, we first pickle the data and compress it with zlib.
# This gives us a string which can be chunked easily. These are then stored
# individually in the cache as single-element lists (so the cache backend
# doesn't try to convert binary data to utf8). The number of chunks needed
# is stored in the cache under the unadorned key
file = StringIO()
pickler = pickle.Pickler(file)
pickler.dump(data)
data = file.getvalue()
if compress_large_data:
data = zlib.compress(data)
i = 0
while len(data) > CACHE_CHUNK_SIZE:
chunk = data[0:CACHE_CHUNK_SIZE]
data = data[CACHE_CHUNK_SIZE:]
cache.set('%s-%d' % (key, i), [chunk], expiration)
i += 1
cache.set('%s-%d' % (key, i), [data], expiration)
cache.set(key, '%d' % (i + 1), expiration)
def cache_memoize(key, lookup_callable,
expiration=getattr(settings, "CACHE_EXPIRATION_TIME",
DEFAULT_EXPIRATION_TIME),
force_overwrite=False,
large_data=False,
compress_large_data=True):
"""Memoize the results of a callable inside the configured cache.
Keyword arguments:
expiration -- The expiration time for the key.
force_overwrite -- If True, the value will always be computed and stored
regardless of whether it exists in the cache already.
large_data -- If True, the resulting data will be pickled, gzipped,
and (potentially) split up into megabyte-sized chunks.
This is useful for very large, computationally
intensive hunks of data which we don't want to store
in a database due to the way things are accessed.
compress_large_data -- Compresses the data with zlib compression when
large_data is True.
"""
key = make_cache_key(key)
if large_data:
if not force_overwrite and cache.has_key(key):
try:
data = _cache_fetch_large_data(cache, key, compress_large_data)
return data
except Exception, e:
logging.warning('Failed to fetch large data from cache for key %s: %s.' % (key, e))
else:
logging.info('Cache miss for key %s.' % key)
data = lookup_callable()
_cache_store_large_data(cache, key, data, expiration,
compress_large_data)
return data
else:
if not force_overwrite and cache.has_key(key):
return cache.get(key)
data = lookup_callable()
# Most people will be using memcached, and memcached has a limit of 1MB.
# Data this big should be broken up somehow, so let's warn about this.
# Users should hopefully be using large_data=True in this case.
# XXX - since 'data' may be a sequence that's not a string/unicode,
# this can fail. len(data) might be something like '6' but the
# data could exceed a megabyte. The best way to catch this would
# be an exception, but while python-memcached defines an exception
# type for this, it never uses it, choosing instead to fail
# silently. WTF.
if len(data) >= CACHE_CHUNK_SIZE:
logging.warning("Cache data for key %s (length %s) may be too big "
"for the cache." % (key, len(data)))
try:
cache.set(key, data, expiration)
except:
pass
return data
def make_cache_key(key):
"""Creates a cache key guaranteed to avoid conflicts and size limits.
The cache key will be prefixed by the site's domain, and will be
changed to an MD5SUM if it's larger than the maximum key size.
"""
try:
site = Site.objects.get_current()
# The install has a Site app, so prefix the domain to the key.
key = "%s:%s" % (site.domain, key)
except:
# The install doesn't have a Site app, so use the key as-is.
pass
# Adhere to memcached key size limit
if len(key) > MAX_KEY_SIZE:
digest = new_md5(key).hexdigest();
# Replace the excess part of the key with a digest of the key
key = key[:MAX_KEY_SIZE - len(digest)] + digest
# Make sure this is a non-unicode string, in order to prevent errors
# with some backends.
key = str(key)
return key
def get_object_or_none(klass, *args, **kwargs):
if isinstance(klass, Manager):
manager = klass
klass = manager.model
else:
manager = klass._default_manager
try:
return manager.get(*args, **kwargs)
except klass.DoesNotExist:
return None
def never_cache_patterns(prefix, *args):
"""
Prevents any included URLs from being cached by the browser.
It's sometimes desirable not to allow browser caching for a set of URLs.
This can be used just like patterns().
"""
pattern_list = []
for t in args:
if isinstance(t, (list, tuple)):
t = url(prefix=prefix, *t)
elif isinstance(t, RegexURLPattern):
t.add_prefix(prefix)
t._callback = never_cache(t.callback)
pattern_list.append(t)
return pattern_list
def generate_media_serial():
"""
Generates a media serial number that can be appended to a media filename
in order to make a URL that can be cached forever without fear of change.
The next time the file is updated and the server is restarted, a new
path will be accessed and cached.
This will crawl the media files (using directories in MEDIA_SERIAL_DIRS if
specified, or all of MEDIA_ROOT otherwise), figuring out the latest
timestamp, and return that value.
"""
MEDIA_SERIAL = getattr(settings, "MEDIA_SERIAL", 0)
if not MEDIA_SERIAL:
media_dirs = getattr(settings, "MEDIA_SERIAL_DIRS", ["."])
for media_dir in media_dirs:
media_path = os.path.join(settings.MEDIA_ROOT, media_dir)
for root, dirs, files in os.walk(media_path):
for name in files:
mtime = int(os.stat(os.path.join(root, name)).st_mtime)
if mtime > MEDIA_SERIAL:
MEDIA_SERIAL = mtime
setattr(settings, "MEDIA_SERIAL", MEDIA_SERIAL)
def generate_ajax_serial():
"""
Generates a serial number that can be appended to filenames involving
dynamic loads of URLs in order to make a URL that can be cached forever
without fear of change.
This will crawl the template files (using directories in TEMPLATE_DIRS),
figuring out the latest timestamp, and return that value.
"""
AJAX_SERIAL = getattr(settings, "AJAX_SERIAL", 0)
if not AJAX_SERIAL:
template_dirs = getattr(settings, "TEMPLATE_DIRS", ["."])
for template_path in template_dirs:
for root, dirs, files in os.walk(template_path):
for name in files:
mtime = int(os.stat(os.path.join(root, name)).st_mtime)
if mtime > AJAX_SERIAL:
AJAX_SERIAL = mtime
setattr(settings, "AJAX_SERIAL", AJAX_SERIAL)
def generate_cache_serials():
"""
Wrapper around generate_media_serial and generate_ajax_serial to
generate all serial numbers in one go.
This should be called early in the startup, such as in the site's
main urls.py.
"""
generate_media_serial()
generate_ajax_serial()