GAH! Add forgotten modules ... #88

gratipay · Jun 29, 2012 · b4a486d · b4a486d
1 parent 0a0896b
commit b4a486d
Show file tree

Hide file tree

Showing 2 changed files with 384 additions and 0 deletions.
diff --git a/gittip/crypto.py b/gittip/crypto.py
@@ -0,0 +1,209 @@
+"""
+Django's standard crypto functions and utilities.
+"""
+from __future__ import unicode_literals
+
+import hmac
+import struct
+import hashlib
+import binascii
+import operator
+import time
+from functools import reduce
+
+# Use the system PRNG if possible
+import random
+try:
+    random = random.SystemRandom()
+    using_sysrandom = True
+except NotImplementedError:
+    import warnings
+    warnings.warn('A secure pseudo-random number generator is not available '
+                  'on your system. Falling back to Mersenne Twister.')
+    using_sysrandom = False
+
+#from django.conf import settings
+SECRET_KEY = ""
+import string
+pool = string.digits + string.letters + string.punctuation
+UNSECURE_RANDOM_STRING = "".join([random.choice(pool) for i in range(64)])
+
+
+# I get wet.
+
+#from django.utils.functional import Promise
+class Promise(object):
+    """
+    This is just a base class for the proxy class created in
+    the closure of the lazy function. It can be used to recognize
+    promises in code.
+    """
+    pass
+
+#from django.utils.encoding import smart_str
+def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
+    """
+    Returns a bytestring version of 's', encoded as specified in 'encoding'.
+
+    If strings_only is True, don't convert (some) non-string-like objects.
+    """
+    if strings_only and (s is None or isinstance(s, int)):
+        return s
+    if isinstance(s, Promise):
+        return unicode(s).encode(encoding, errors)
+    elif not isinstance(s, basestring):
+        try:
+            return str(s)
+        except UnicodeEncodeError:
+            if isinstance(s, Exception):
+                # An Exception subclass containing non-ASCII data that doesn't
+                # know how to print itself properly. We shouldn't raise a
+                # further exception.
+                return ' '.join([smart_str(arg, encoding, strings_only,
+                        errors) for arg in s])
+            return unicode(s).encode(encoding, errors)
+    elif isinstance(s, unicode):
+        return s.encode(encoding, errors)
+    elif s and encoding != 'utf-8':
+        return s.decode('utf-8', errors).encode(encoding, errors)
+    else:
+        return s
+
+
+_trans_5c = b"".join([chr(x ^ 0x5C) for x in xrange(256)])
+_trans_36 = b"".join([chr(x ^ 0x36) for x in xrange(256)])
+
+
+def salted_hmac(key_salt, value, secret=None):
+    """
+    Returns the HMAC-SHA1 of 'value', using a key generated from key_salt and a
+    secret (which defaults to settings.SECRET_KEY).
+
+    A different key_salt should be passed in for every application of HMAC.
+    """
+    if secret is None:
+        raise NotImplementedError
+        secret = settings.SECRET_KEY
+
+    # We need to generate a derived key from our base key.  We can do this by
+    # passing the key_salt and our base key through a pseudo-random function and
+    # SHA1 works nicely.
+    key = hashlib.sha1((key_salt + secret).encode('utf-8')).digest()
+
+    # If len(key_salt + secret) > sha_constructor().block_size, the above
+    # line is redundant and could be replaced by key = key_salt + secret, since
+    # the hmac module does the same thing for keys longer than the block size.
+    # However, we need to ensure that we *always* do this.
+    return hmac.new(key, msg=value, digestmod=hashlib.sha1)
+
+
+def get_random_string(length=12,
+                      allowed_chars='abcdefghijklmnopqrstuvwxyz'
+                                    'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'):
+    """
+    Returns a securely generated random string.
+
+    The default length of 12 with the a-z, A-Z, 0-9 character set returns
+    a 71-bit value. log_2((26+26+10)^12) =~ 71 bits
+    """
+    if not using_sysrandom:
+        # This is ugly, and a hack, but it makes things better than
+        # the alternative of predictability. This re-seeds the PRNG
+        # using a value that is hard for an attacker to predict, every
+        # time a random string is required. This may change the
+        # properties of the chosen random sequence slightly, but this
+        # is better than absolute predictability.
+        random.seed(
+            hashlib.sha256(
+                "%s%s%s" % (
+                    random.getstate(),
+                    time.time(),
+                    UNSECURE_RANDOM_STRING)
+                ).digest())
+    return ''.join([random.choice(allowed_chars) for i in range(length)])
+
+
+def constant_time_compare(val1, val2):
+    """
+    Returns True if the two strings are equal, False otherwise.
+
+    The time taken is independent of the number of characters that match.
+    """
+    if len(val1) != len(val2):
+        return False
+    result = 0
+    for x, y in zip(val1, val2):
+        result |= ord(x) ^ ord(y)
+    return result == 0
+
+
+def _bin_to_long(x):
+    """
+    Convert a binary string into a long integer
+
+    This is a clever optimization for fast xor vector math
+    """
+    return long(x.encode('hex'), 16)
+
+
+def _long_to_bin(x, hex_format_string):
+    """
+    Convert a long integer into a binary string.
+    hex_format_string is like "%020x" for padding 10 characters.
+    """
+    return binascii.unhexlify((hex_format_string % x).encode('ascii'))
+
+
+def _fast_hmac(key, msg, digest):
+    """
+    A trimmed down version of Python's HMAC implementation
+    """
+    dig1, dig2 = digest(), digest()
+    key = smart_str(key)
+    if len(key) > dig1.block_size:
+        key = digest(key).digest()
+    key += chr(0) * (dig1.block_size - len(key))
+    dig1.update(key.translate(_trans_36))
+    dig1.update(msg)
+    dig2.update(key.translate(_trans_5c))
+    dig2.update(dig1.digest())
+    return dig2
+
+
+def pbkdf2(password, salt, iterations, dklen=0, digest=None):
+    """
+    Implements PBKDF2 as defined in RFC 2898, section 5.2
+
+    HMAC+SHA256 is used as the default pseudo random function.
+
+    Right now 10,000 iterations is the recommended default which takes
+    100ms on a 2.2Ghz Core 2 Duo.  This is probably the bare minimum
+    for security given 1000 iterations was recommended in 2001. This
+    code is very well optimized for CPython and is only four times
+    slower than openssl's implementation.
+    """
+    assert iterations > 0
+    if not digest:
+        digest = hashlib.sha256
+    password = smart_str(password)
+    salt = smart_str(salt)
+    hlen = digest().digest_size
+    if not dklen:
+        dklen = hlen
+    if dklen > (2 ** 32 - 1) * hlen:
+        raise OverflowError('dklen too big')
+    l = -(-dklen // hlen)
+    r = dklen - (l - 1) * hlen
+
+    hex_format_string = "%%0%ix" % (hlen * 2)
+
+    def F(i):
+        def U():
+            u = salt + struct.pack(b'>I', i)
+            for j in xrange(int(iterations)):
+                u = _fast_hmac(password, u, digest).digest()
+                yield _bin_to_long(u)
+        return _long_to_bin(reduce(operator.xor, U()), hex_format_string)
+
+    T = [F(x) for x in range(1, l + 1)]
+    return b''.join(T[:-1]) + T[-1][:r]
diff --git a/gittip/csrf.py b/gittip/csrf.py
@@ -0,0 +1,175 @@
+"""Cross Site Request Forgery middleware, borrowed from Django.
+
+See also:
+
+    https://github.com/django/django/blob/master/django/middleware/csrf.py
+    https://docs.djangoproject.com/en/dev/ref/contrib/csrf/
+    https://github.com/whit537/www.gittip.com/issues/88
+
+"""
+import rfc822
+import re
+import time
+import urlparse
+
+
+#from django.utils.cache import patch_vary_headers
+cc_delim_re = re.compile(r'\s*,\s*')
+def patch_vary_headers(response, newheaders):
+    """
+    Adds (or updates) the "Vary" header in the given HttpResponse object.
+    newheaders is a list of header names that should be in "Vary". Existing
+    headers in "Vary" aren't removed.
+    """
+    # Note that we need to keep the original order intact, because cache
+    # implementations may rely on the order of the Vary contents in, say,
+    # computing an MD5 hash.
+    if 'Vary' in response.headers:
+        vary_headers = cc_delim_re.split(response.headers['Vary'])
+    else:
+        vary_headers = []
+    # Use .lower() here so we treat headers as case-insensitive.
+    existing_headers = set([header.lower() for header in vary_headers])
+    additional_headers = [newheader for newheader in newheaders
+                          if newheader.lower() not in existing_headers]
+    response.headers['Vary'] = ', '.join(vary_headers + additional_headers)
+
+
+#from django.utils.http import same_origin
+def same_origin(url1, url2):
+    """
+    Checks if two URLs are 'same-origin'
+    """
+    p1, p2 = urlparse.urlparse(url1), urlparse.urlparse(url2)
+    return (p1.scheme, p1.hostname, p1.port) == (p2.scheme, p2.hostname, p2.port)
+
+
+from aspen import Response
+from crypto import constant_time_compare, get_random_string
+
+REASON_NO_REFERER = "Referer checking failed - no Referer."
+REASON_BAD_REFERER = "Referer checking failed - %s does not match %s."
+REASON_NO_CSRF_COOKIE = "CSRF cookie not set."
+REASON_BAD_TOKEN = "CSRF token missing or incorrect."
+
+TOKEN_LENGTH = 32
+TIMEOUT = 60 * 60 * 24 * 7 * 52
+
+
+def _get_new_csrf_key():
+    return get_random_string(TOKEN_LENGTH)
+
+
+def _sanitize_token(token):
+    # Allow only alphanum, and ensure we return a 'str' for the sake
+    # of the post processing middleware.
+    if len(token) > TOKEN_LENGTH:
+        return _get_new_csrf_key()
+    token = re.sub('[^a-zA-Z0-9]+', '', str(token.decode('ascii', 'ignore')))
+    if token == "":
+        # In case the cookie has been truncated to nothing at some point.
+        return _get_new_csrf_key()
+    return token
+
+def _is_secure(request):
+    import gittip
+    return gittip.canonical_scheme == 'https'
+
+def _get_host(request):
+    """Returns the HTTP host using the request headers.
+    """
+    return request.headers.get('X-Forwarded-Host', request.headers['Host'])
+
+
+
+def inbound(request):
+    """Given a Request object, reject it if it's a forgery.
+    """
+
+    try:
+        csrf_token = request.headers.cookie.get('csrf_token')
+        csrf_token = '' if csrf_token is None else csrf_token.value
+        csrf_token = _sanitize_token(csrf_token)
+        # Use same token next time
+        request.context['csrf_token'] = csrf_token
+    except KeyError:
+        csrf_token = None
+        # Generate token and store it in the request, so it's
+        # available to the view.
+        request.context['csrf_token'] = _get_new_csrf_key()
+
+    # Assume that anything not defined as 'safe' by RC2616 needs protection
+    if request.line.method not in ('GET', 'HEAD', 'OPTIONS', 'TRACE'):
+
+        if _is_secure(request):
+            # Suppose user visits http://example.com/
+            # An active network attacker (man-in-the-middle, MITM) sends a
+            # POST form that targets https://example.com/detonate-bomb/ and
+            # submits it via JavaScript.
+            #
+            # The attacker will need to provide a CSRF cookie and token, but
+            # that's no problem for a MITM and the session-independent
+            # nonce we're using. So the MITM can circumvent the CSRF
+            # protection. This is true for any HTTP connection, but anyone
+            # using HTTPS expects better! For this reason, for
+            # https://example.com/ we need additional protection that treats
+            # http://example.com/ as completely untrusted. Under HTTPS,
+            # Barth et al. found that the Referer header is missing for
+            # same-domain requests in only about 0.2% of cases or less, so
+            # we can use strict Referer checking.
+            referer = request.headers.get('HTTP_REFERER')
+            if referer is None:
+                raise Response(403, REASON_NO_REFERER)
+
+            # Note that get_host() includes the port.
+            good_referer = 'https://%s/' % _get_host(request)
+            if not same_origin(referer, good_referer):
+                reason = REASON_BAD_REFERER % (referer, good_referer)
+                raise Response(403, reason)
+
+        if csrf_token is None:
+            # No CSRF cookie. For POST requests, we insist on a CSRF cookie,
+            # and in this way we can avoid all CSRF attacks, including login
+            # CSRF.
+            raise Response(403, REASON_NO_CSRF_COOKIE)
+
+        # Check non-cookie token for match.
+        request_csrf_token = ""
+        if request.line.method == "POST":
+            request_csrf_token = request.body.get('csrf_token', '')
+
+        if request_csrf_token == "":
+            # Fall back to X-CSRF-TOKEN, to make things easier for AJAX,
+            # and possible for PUT/DELETE.
+            request_csrf_token = request.headers.get('X-CSRF-TOKEN', '')
+
+        if not constant_time_compare(request_csrf_token, csrf_token):
+            raise Response(403, REASON_BAD_TOKEN)
+
+
+def outbound(response):
+
+    csrf_token = response.request.context.get('csrf_token')
+
+
+    # If csrf_token is unset, then inbound was never called, probaby because
+    # another inbound hook short-circuited.
+
+    if csrf_token is None:
+        return response
+
+
+    # Set the CSRF cookie even if it's already set, so we renew
+    # the expiry timer.
+
+    response.headers.cookie['csrf_token'] = csrf_token
+    cookie = response.headers.cookie['csrf_token']
+    # I am not setting domain, because it is supposed to default to what we 
+    # want: the domain of the object requested.
+    #cookie['domain']
+    cookie['path'] = '/'
+    cookie['expires'] = rfc822.formatdate(time.time() + TIMEOUT)
+    #cookie['httponly'] = "Yes, please."  Want js access for this.
+
+    # Content varies with the CSRF cookie, so set the Vary header.
+    patch_vary_headers(response, ('Cookie',))