From b4a486df1fa6825c9687a304a673fc9f13c7f4dd Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Fri, 29 Jun 2012 12:12:58 -0400 Subject: [PATCH] GAH! Add forgotten modules ... #88 --- gittip/crypto.py | 209 +++++++++++++++++++++++++++++++++++++++++++++++ gittip/csrf.py | 175 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 384 insertions(+) create mode 100644 gittip/crypto.py create mode 100644 gittip/csrf.py diff --git a/gittip/crypto.py b/gittip/crypto.py new file mode 100644 index 0000000000..d71b6a9124 --- /dev/null +++ b/gittip/crypto.py @@ -0,0 +1,209 @@ +""" +Django's standard crypto functions and utilities. +""" +from __future__ import unicode_literals + +import hmac +import struct +import hashlib +import binascii +import operator +import time +from functools import reduce + +# Use the system PRNG if possible +import random +try: + random = random.SystemRandom() + using_sysrandom = True +except NotImplementedError: + import warnings + warnings.warn('A secure pseudo-random number generator is not available ' + 'on your system. Falling back to Mersenne Twister.') + using_sysrandom = False + +#from django.conf import settings +SECRET_KEY = "" +import string +pool = string.digits + string.letters + string.punctuation +UNSECURE_RANDOM_STRING = "".join([random.choice(pool) for i in range(64)]) + + +# I get wet. + +#from django.utils.functional import Promise +class Promise(object): + """ + This is just a base class for the proxy class created in + the closure of the lazy function. It can be used to recognize + promises in code. + """ + pass + +#from django.utils.encoding import smart_str +def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'): + """ + Returns a bytestring version of 's', encoded as specified in 'encoding'. + + If strings_only is True, don't convert (some) non-string-like objects. + """ + if strings_only and (s is None or isinstance(s, int)): + return s + if isinstance(s, Promise): + return unicode(s).encode(encoding, errors) + elif not isinstance(s, basestring): + try: + return str(s) + except UnicodeEncodeError: + if isinstance(s, Exception): + # An Exception subclass containing non-ASCII data that doesn't + # know how to print itself properly. We shouldn't raise a + # further exception. + return ' '.join([smart_str(arg, encoding, strings_only, + errors) for arg in s]) + return unicode(s).encode(encoding, errors) + elif isinstance(s, unicode): + return s.encode(encoding, errors) + elif s and encoding != 'utf-8': + return s.decode('utf-8', errors).encode(encoding, errors) + else: + return s + + +_trans_5c = b"".join([chr(x ^ 0x5C) for x in xrange(256)]) +_trans_36 = b"".join([chr(x ^ 0x36) for x in xrange(256)]) + + +def salted_hmac(key_salt, value, secret=None): + """ + Returns the HMAC-SHA1 of 'value', using a key generated from key_salt and a + secret (which defaults to settings.SECRET_KEY). + + A different key_salt should be passed in for every application of HMAC. + """ + if secret is None: + raise NotImplementedError + secret = settings.SECRET_KEY + + # We need to generate a derived key from our base key. We can do this by + # passing the key_salt and our base key through a pseudo-random function and + # SHA1 works nicely. + key = hashlib.sha1((key_salt + secret).encode('utf-8')).digest() + + # If len(key_salt + secret) > sha_constructor().block_size, the above + # line is redundant and could be replaced by key = key_salt + secret, since + # the hmac module does the same thing for keys longer than the block size. + # However, we need to ensure that we *always* do this. + return hmac.new(key, msg=value, digestmod=hashlib.sha1) + + +def get_random_string(length=12, + allowed_chars='abcdefghijklmnopqrstuvwxyz' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'): + """ + Returns a securely generated random string. + + The default length of 12 with the a-z, A-Z, 0-9 character set returns + a 71-bit value. log_2((26+26+10)^12) =~ 71 bits + """ + if not using_sysrandom: + # This is ugly, and a hack, but it makes things better than + # the alternative of predictability. This re-seeds the PRNG + # using a value that is hard for an attacker to predict, every + # time a random string is required. This may change the + # properties of the chosen random sequence slightly, but this + # is better than absolute predictability. + random.seed( + hashlib.sha256( + "%s%s%s" % ( + random.getstate(), + time.time(), + UNSECURE_RANDOM_STRING) + ).digest()) + return ''.join([random.choice(allowed_chars) for i in range(length)]) + + +def constant_time_compare(val1, val2): + """ + Returns True if the two strings are equal, False otherwise. + + The time taken is independent of the number of characters that match. + """ + if len(val1) != len(val2): + return False + result = 0 + for x, y in zip(val1, val2): + result |= ord(x) ^ ord(y) + return result == 0 + + +def _bin_to_long(x): + """ + Convert a binary string into a long integer + + This is a clever optimization for fast xor vector math + """ + return long(x.encode('hex'), 16) + + +def _long_to_bin(x, hex_format_string): + """ + Convert a long integer into a binary string. + hex_format_string is like "%020x" for padding 10 characters. + """ + return binascii.unhexlify((hex_format_string % x).encode('ascii')) + + +def _fast_hmac(key, msg, digest): + """ + A trimmed down version of Python's HMAC implementation + """ + dig1, dig2 = digest(), digest() + key = smart_str(key) + if len(key) > dig1.block_size: + key = digest(key).digest() + key += chr(0) * (dig1.block_size - len(key)) + dig1.update(key.translate(_trans_36)) + dig1.update(msg) + dig2.update(key.translate(_trans_5c)) + dig2.update(dig1.digest()) + return dig2 + + +def pbkdf2(password, salt, iterations, dklen=0, digest=None): + """ + Implements PBKDF2 as defined in RFC 2898, section 5.2 + + HMAC+SHA256 is used as the default pseudo random function. + + Right now 10,000 iterations is the recommended default which takes + 100ms on a 2.2Ghz Core 2 Duo. This is probably the bare minimum + for security given 1000 iterations was recommended in 2001. This + code is very well optimized for CPython and is only four times + slower than openssl's implementation. + """ + assert iterations > 0 + if not digest: + digest = hashlib.sha256 + password = smart_str(password) + salt = smart_str(salt) + hlen = digest().digest_size + if not dklen: + dklen = hlen + if dklen > (2 ** 32 - 1) * hlen: + raise OverflowError('dklen too big') + l = -(-dklen // hlen) + r = dklen - (l - 1) * hlen + + hex_format_string = "%%0%ix" % (hlen * 2) + + def F(i): + def U(): + u = salt + struct.pack(b'>I', i) + for j in xrange(int(iterations)): + u = _fast_hmac(password, u, digest).digest() + yield _bin_to_long(u) + return _long_to_bin(reduce(operator.xor, U()), hex_format_string) + + T = [F(x) for x in range(1, l + 1)] + return b''.join(T[:-1]) + T[-1][:r] diff --git a/gittip/csrf.py b/gittip/csrf.py new file mode 100644 index 0000000000..adea9bb72c --- /dev/null +++ b/gittip/csrf.py @@ -0,0 +1,175 @@ +"""Cross Site Request Forgery middleware, borrowed from Django. + +See also: + + https://github.com/django/django/blob/master/django/middleware/csrf.py + https://docs.djangoproject.com/en/dev/ref/contrib/csrf/ + https://github.com/whit537/www.gittip.com/issues/88 + +""" +import rfc822 +import re +import time +import urlparse + + +#from django.utils.cache import patch_vary_headers +cc_delim_re = re.compile(r'\s*,\s*') +def patch_vary_headers(response, newheaders): + """ + Adds (or updates) the "Vary" header in the given HttpResponse object. + newheaders is a list of header names that should be in "Vary". Existing + headers in "Vary" aren't removed. + """ + # Note that we need to keep the original order intact, because cache + # implementations may rely on the order of the Vary contents in, say, + # computing an MD5 hash. + if 'Vary' in response.headers: + vary_headers = cc_delim_re.split(response.headers['Vary']) + else: + vary_headers = [] + # Use .lower() here so we treat headers as case-insensitive. + existing_headers = set([header.lower() for header in vary_headers]) + additional_headers = [newheader for newheader in newheaders + if newheader.lower() not in existing_headers] + response.headers['Vary'] = ', '.join(vary_headers + additional_headers) + + +#from django.utils.http import same_origin +def same_origin(url1, url2): + """ + Checks if two URLs are 'same-origin' + """ + p1, p2 = urlparse.urlparse(url1), urlparse.urlparse(url2) + return (p1.scheme, p1.hostname, p1.port) == (p2.scheme, p2.hostname, p2.port) + + +from aspen import Response +from crypto import constant_time_compare, get_random_string + +REASON_NO_REFERER = "Referer checking failed - no Referer." +REASON_BAD_REFERER = "Referer checking failed - %s does not match %s." +REASON_NO_CSRF_COOKIE = "CSRF cookie not set." +REASON_BAD_TOKEN = "CSRF token missing or incorrect." + +TOKEN_LENGTH = 32 +TIMEOUT = 60 * 60 * 24 * 7 * 52 + + +def _get_new_csrf_key(): + return get_random_string(TOKEN_LENGTH) + + +def _sanitize_token(token): + # Allow only alphanum, and ensure we return a 'str' for the sake + # of the post processing middleware. + if len(token) > TOKEN_LENGTH: + return _get_new_csrf_key() + token = re.sub('[^a-zA-Z0-9]+', '', str(token.decode('ascii', 'ignore'))) + if token == "": + # In case the cookie has been truncated to nothing at some point. + return _get_new_csrf_key() + return token + +def _is_secure(request): + import gittip + return gittip.canonical_scheme == 'https' + +def _get_host(request): + """Returns the HTTP host using the request headers. + """ + return request.headers.get('X-Forwarded-Host', request.headers['Host']) + + + +def inbound(request): + """Given a Request object, reject it if it's a forgery. + """ + + try: + csrf_token = request.headers.cookie.get('csrf_token') + csrf_token = '' if csrf_token is None else csrf_token.value + csrf_token = _sanitize_token(csrf_token) + # Use same token next time + request.context['csrf_token'] = csrf_token + except KeyError: + csrf_token = None + # Generate token and store it in the request, so it's + # available to the view. + request.context['csrf_token'] = _get_new_csrf_key() + + # Assume that anything not defined as 'safe' by RC2616 needs protection + if request.line.method not in ('GET', 'HEAD', 'OPTIONS', 'TRACE'): + + if _is_secure(request): + # Suppose user visits http://example.com/ + # An active network attacker (man-in-the-middle, MITM) sends a + # POST form that targets https://example.com/detonate-bomb/ and + # submits it via JavaScript. + # + # The attacker will need to provide a CSRF cookie and token, but + # that's no problem for a MITM and the session-independent + # nonce we're using. So the MITM can circumvent the CSRF + # protection. This is true for any HTTP connection, but anyone + # using HTTPS expects better! For this reason, for + # https://example.com/ we need additional protection that treats + # http://example.com/ as completely untrusted. Under HTTPS, + # Barth et al. found that the Referer header is missing for + # same-domain requests in only about 0.2% of cases or less, so + # we can use strict Referer checking. + referer = request.headers.get('HTTP_REFERER') + if referer is None: + raise Response(403, REASON_NO_REFERER) + + # Note that get_host() includes the port. + good_referer = 'https://%s/' % _get_host(request) + if not same_origin(referer, good_referer): + reason = REASON_BAD_REFERER % (referer, good_referer) + raise Response(403, reason) + + if csrf_token is None: + # No CSRF cookie. For POST requests, we insist on a CSRF cookie, + # and in this way we can avoid all CSRF attacks, including login + # CSRF. + raise Response(403, REASON_NO_CSRF_COOKIE) + + # Check non-cookie token for match. + request_csrf_token = "" + if request.line.method == "POST": + request_csrf_token = request.body.get('csrf_token', '') + + if request_csrf_token == "": + # Fall back to X-CSRF-TOKEN, to make things easier for AJAX, + # and possible for PUT/DELETE. + request_csrf_token = request.headers.get('X-CSRF-TOKEN', '') + + if not constant_time_compare(request_csrf_token, csrf_token): + raise Response(403, REASON_BAD_TOKEN) + + +def outbound(response): + + csrf_token = response.request.context.get('csrf_token') + + + # If csrf_token is unset, then inbound was never called, probaby because + # another inbound hook short-circuited. + + if csrf_token is None: + return response + + + # Set the CSRF cookie even if it's already set, so we renew + # the expiry timer. + + response.headers.cookie['csrf_token'] = csrf_token + cookie = response.headers.cookie['csrf_token'] + # I am not setting domain, because it is supposed to default to what we + # want: the domain of the object requested. + #cookie['domain'] + cookie['path'] = '/' + cookie['expires'] = rfc822.formatdate(time.time() + TIMEOUT) + #cookie['httponly'] = "Yes, please." Want js access for this. + + # Content varies with the CSRF cookie, so set the Vary header. + patch_vary_headers(response, ('Cookie',))