diff --git a/bin/rekey.py b/bin/rekey.py index 61ceb0b4cd..a1c20dc1de 100755 --- a/bin/rekey.py +++ b/bin/rekey.py @@ -1,10 +1,14 @@ #!/usr/bin/env python2 +"""See gratipay.models.participant.mixins.identity.rekey for documentation. +""" from __future__ import absolute_import, division, print_function, unicode_literals from gratipay import wireup +from gratipay.models.participant.mixins import identity as participant_identities env = wireup.env() db = wireup.db(env) -wireup.crypto(env) +packer = wireup.crypto(env) -print("{} record(s) rekeyed.".format(0)) # stubbed until we have something to rekey +n = participant_identities.rekey(db, packer) +print("Rekeyed {} participant identity record(s).".format(n)) diff --git a/gratipay/models/country.py b/gratipay/models/country.py index 6b6dc5e668..080605d7a2 100644 --- a/gratipay/models/country.py +++ b/gratipay/models/country.py @@ -7,12 +7,9 @@ class Country(Model): """Represent country records from our database (read-only). :var int id: the record's primary key in our ``countries`` table - :var unicode name: the name of the country - :var unicode code2: the country's `ISO 3166-1 alpha-2`_ code - :var unicode code3: the country's `ISO 3166-1 alpha-3`_ code + :var unicode code: the country's `ISO 3166-1 alpha-2`_ code .. _ISO 3166-1 alpha-2 : https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2 - .. _ISO 3166-1 alpha-3 : https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3 """ typname = 'countries' diff --git a/gratipay/models/participant/__init__.py b/gratipay/models/participant/__init__.py index 440a1a9a03..4bd8a127db 100644 --- a/gratipay/models/participant/__init__.py +++ b/gratipay/models/participant/__init__.py @@ -38,6 +38,7 @@ from gratipay.models.account_elsewhere import AccountElsewhere from gratipay.models.exchange_route import ExchangeRoute from gratipay.models.team import Team +from gratipay.models.participant import mixins from gratipay.security.crypto import constant_time_compare from gratipay.utils import ( i18n, @@ -60,7 +61,7 @@ USERNAME_MAX_SIZE = 32 -class Participant(Model): +class Participant(Model, mixins.Identity): """Represent a Gratipay participant. """ @@ -355,6 +356,7 @@ def clear_personal_information(self, cursor): DELETE FROM emails WHERE participant_id = %(participant_id)s; DELETE FROM statements WHERE participant=%(participant_id)s; + DELETE FROM participant_identities WHERE participant_id=%(participant_id)s; UPDATE participants SET anonymous_giving=False @@ -1437,6 +1439,15 @@ def take_over(self, account, have_confirmation=False): return + # Hard fail if the other participant has an identity. + # =================================================== + # Our identity system is very young. Maybe some day we'll do + # something smarter here. + + if other.list_identity_metadata(): + raise WontTakeOverWithIdentities() + + # Make sure we have user confirmation if needed. # ============================================== # We need confirmation in whatever combination of the following @@ -1651,3 +1662,5 @@ class LastElsewhere(Exception): pass class NonexistingElsewhere(Exception): pass class TeamCantBeOnlyAuth(Exception): pass + +class WontTakeOverWithIdentities(Exception): pass diff --git a/gratipay/models/participant/mixins/__init__.py b/gratipay/models/participant/mixins/__init__.py new file mode 100644 index 0000000000..b05ab6ff13 --- /dev/null +++ b/gratipay/models/participant/mixins/__init__.py @@ -0,0 +1,3 @@ +from .identity import IdentityMixin as Identity + +__all__ = ['Identity'] diff --git a/gratipay/models/participant/mixins/identity.py b/gratipay/models/participant/mixins/identity.py new file mode 100644 index 0000000000..7fd0e7a2cf --- /dev/null +++ b/gratipay/models/participant/mixins/identity.py @@ -0,0 +1,217 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + +from psycopg2 import IntegrityError +from gratipay.models import add_event + + +class ParticipantIdentityError(StandardError): pass +class ParticipantIdentitySchemaUnknown(ParticipantIdentityError): pass +class ParticipantIdentityInfoInvalid(ParticipantIdentityError): pass + + +schema_validators = {'nothing-enforced': lambda info: None} + + +def _validate_info(schema_name, info): + if schema_name not in schema_validators: + raise ParticipantIdentitySchemaUnknown("unknown schema '{}'".format(schema_name)) + validate_schema = schema_validators[schema_name] + validate_schema(info) + return None + + +class IdentityMixin(object): + """This mixin provides management of national identities for + :py:class:`~gratipay.models.participant.Participant` objects. + + A participant may have zero or more national identities on file with + Gratipay, with at most one for any given country at any given time. When at + least one of a participant's national identities has been verified, then + they may join the payroll of one or more Teams. + + Since national identity information is more sensitive than other + information in our database, we encrypt it in the application layer before + passing it to the database in :py:meth:`store_identity_info`. We then limit + access to the information to a single method, + :py:meth:`retrieve_identity_info`. + + """ + + def store_identity_info(self, country_id, schema_name, info): + """Store the participant's national identity information for a given country. + + :param int country_id: an ``id`` from the ``countries`` table + :param dict schema_name: the name of the schema of the identity information + :param dict info: a dictionary of identity information + + :returns: the ``id`` of the identity info's record in the + ``participant_identities`` table + + :raises ParticipantIdentitySchemaUnknown: if ``schema_name`` doesn't + name a known schema + :raises ParticipantIdentityInfoInvalid: if the ``info`` dictionary does + not conform to the schema named by ``schema_name`` + + The ``info`` dictionary will be serialized to JSON and then encrypted + with :py:class:`~gratipay.security.crypto.EncryptingPacker` before + being sent to the database. We anticipate multiple schemas evolving for + this dictionary, with enforcement in the application layer (since the + field is opaque in the database layer). For now there is only one + available schema: ``nothing-enforced``. + + """ + _validate_info(schema_name, info) + info = self.encrypting_packer.pack(info) + + def _add_event(action): + payload = dict( id=self.id + , country_id=country_id + , identity_id=identity_id + , action=action + ' identity' + ) + add_event(cursor, 'participant', payload) + + params = dict( participant_id=self.id + , country_id=country_id + , info=info + , schema_name=schema_name + ) + + try: + with self.db.get_cursor() as cursor: + identity_id = cursor.one(""" + + INSERT INTO participant_identities + (participant_id, country_id, schema_name, info) + VALUES (%(participant_id)s, %(country_id)s, %(schema_name)s, %(info)s) + RETURNING id + + """, params) + _add_event('insert') + + except IntegrityError as exc: + if exc.pgcode != '23505': + raise + with self.db.get_cursor() as cursor: + identity_id, old_schema_name = cursor.one(""" + + UPDATE participant_identities + SET schema_name=%(schema_name)s, info=%(info)s + WHERE participant_id=%(participant_id)s + AND country_id=%(country_id)s + RETURNING id, schema_name + + """, params) + _add_event('update') + + return identity_id + + + def retrieve_identity_info(self, country_id): + """Return the participant's national identity information for a given country. + + :param int country_id: an ``id`` from the ``countries`` table + + :returns: a dictionary of identity information, or ``None`` + + """ + with self.db.get_cursor() as cursor: + identity_id, info = cursor.one(""" + + SELECT id, info + FROM participant_identities + WHERE participant_id=%s + AND country_id=%s + + """, (self.id, country_id), default=(None, None)) + + if info is not None: + info = bytes(info) # psycopg2 returns bytea as buffer; we want bytes + info = self.encrypting_packer.unpack(info) + + payload = dict( id=self.id + , identity_id=identity_id + , country_id=country_id + , action='retrieve identity' + ) + + add_event(cursor, 'participant', payload) + + return info + + + def list_identity_metadata(self): + """Return a list of identity metadata records, sorted by country name. + + Identity metadata records have the following attributes: + + :var int id: the record's primary key in the ``participant_identities`` table + :var Country country: the country this identity applies to + :var unicode schema_name: the name of the schema that the data itself conforms to + + The national identity information itself is not included, only + metadata. Use :py:meth:`retrieve_identity_info` to get the actual data. + + """ + return self.db.all(""" + + SELECT pi.id + , c.*::countries AS country + , schema_name + FROM participant_identities pi + JOIN countries c ON pi.country_id=c.id + WHERE participant_id=%s + ORDER BY c.code + + """, (self.id,)) + + +# Rekeying +# ======== + +def rekey(db, packer): + """Rekey the encrypted participant identity information in our database. + + :param GratipayDB db: used to access the database + :param EncryptingPacker packer: used to decrypt and encrypt data + + This function features prominently in our procedure for rekeying our + encrypted data, as documented in the "`Keep Secrets`_" howto. It operates + by loading records from `participant_identities` that haven't been updated + in the present month, in batches of 100. It updates a timestamp atomically + with each rekeyed `info`, so it can be safely rerun in the face of network + failure, etc. + + .. _Keep Secrets: http://inside.gratipay.com/howto/keep-secrets + + """ + n = 0 + while 1: + m = _rekey_one_batch(db, packer) + if m == 0: + break + n += m + return n + + +def _rekey_one_batch(db, packer): + batch = db.all(""" + + SELECT id, info + FROM participant_identities + WHERE _info_last_keyed < date_trunc('month', now()) + ORDER BY _info_last_keyed ASC + LIMIT 100 + + """) + if not batch: + return 0 + + for rec in batch: + plaintext = packer.unpack(bytes(rec.info)) + new_token = packer.pack(plaintext) + db.run( "UPDATE participant_identities SET info=%s, _info_last_keyed=now() WHERE id=%s" + , (new_token, rec.id) + ) + + return len(batch) diff --git a/gratipay/wireup.py b/gratipay/wireup.py index 6b41d89334..8aa1dd220e 100644 --- a/gratipay/wireup.py +++ b/gratipay/wireup.py @@ -34,6 +34,7 @@ from gratipay.models.country import Country from gratipay.models.exchange_route import ExchangeRoute from gratipay.models.participant import Participant +from gratipay.models.participant.mixins import Identity from gratipay.models.team import Team from gratipay.models import GratipayDB from gratipay.security.crypto import EncryptingPacker @@ -63,7 +64,8 @@ def db(env): def crypto(env): keys = [k.encode('ASCII') for k in env.crypto_keys.split()] - Participant.encrypting_packer = EncryptingPacker(*keys) + out = Identity.encrypting_packer = EncryptingPacker(*keys) + return out def mail(env, project_root='.'): if env.aws_ses_access_key_id and env.aws_ses_secret_access_key and env.aws_ses_default_region: diff --git a/sql/branch.sql b/sql/branch.sql new file mode 100644 index 0000000000..d82835a355 --- /dev/null +++ b/sql/branch.sql @@ -0,0 +1,28 @@ +CREATE TABLE participant_identities +( id bigserial primary key +, participant_id bigint NOT NULL REFERENCES participants(id) +, country_id bigint NOT NULL REFERENCES countries(id) +, schema_name text NOT NULL +, info bytea NOT NULL +, _info_last_keyed timestamptz NOT NULL DEFAULT now() +, UNIQUE(participant_id, country_id) + ); + + +-- fail_if_no_email + +CREATE FUNCTION fail_if_no_email() RETURNS trigger AS $$ + BEGIN + IF (SELECT email_address FROM participants WHERE id=NEW.participant_id) IS NULL THEN + RAISE EXCEPTION + USING ERRCODE=23100 + , MESSAGE='This operation requires a verified participant email address.'; + END IF; + RETURN NEW; + END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER enforce_email_for_participant_identity + BEFORE INSERT ON participant_identities + FOR EACH ROW + EXECUTE PROCEDURE fail_if_no_email(); diff --git a/tests/py/test_close.py b/tests/py/test_close.py index c15acb9cc9..88ca8a59cc 100644 --- a/tests/py/test_close.py +++ b/tests/py/test_close.py @@ -174,6 +174,19 @@ def test_cpi_clears_communities(self): assert Community.from_slug('test').nmembers == 1 + def test_cpi_clears_personal_identities(self): + alice = self.make_participant('alice', email_address='alice@example.com') + US = self.db.one("SELECT id FROM countries WHERE code='US'") + alice.store_identity_info(US, 'nothing-enforced', {'name': 'Alice'}) + assert len(alice.list_identity_metadata()) == 1 + assert self.db.one('SELECT count(*) FROM participant_identities;') == 1 + + with self.db.get_cursor() as cursor: + alice.clear_personal_information(cursor) + + assert len(alice.list_identity_metadata()) == 0 + assert self.db.one('SELECT count(*) FROM participant_identities;') == 0 + # uic = update_is_closed diff --git a/tests/py/test_participant.py b/tests/py/test_participant.py index f944a35e82..d3d5552d9b 100644 --- a/tests/py/test_participant.py +++ b/tests/py/test_participant.py @@ -22,7 +22,8 @@ from gratipay.models.account_elsewhere import AccountElsewhere from gratipay.models.exchange_route import ExchangeRoute from gratipay.models.participant import ( - LastElsewhere, NeedConfirmation, NonexistingElsewhere, Participant, TeamCantBeOnlyAuth + LastElsewhere, NeedConfirmation, NonexistingElsewhere, Participant, TeamCantBeOnlyAuth, + WontTakeOverWithIdentities ) from gratipay.models.team import Team from gratipay.testing import Harness @@ -192,6 +193,31 @@ def test_take_over_fails_if_it_would_result_in_just_a_team_account(self): , have_confirmation=True ) + def test_take_over_is_fine_with_identity_info_on_primary(self): + TT = self.db.one("SELECT id FROM countries WHERE code='TT'") + alice = self.make_participant('alice') + alice.add_email('alice@example.com') + alice.verify_email('alice@example.com', alice.get_email('alice@example.com').nonce) + alice.store_identity_info(TT, 'nothing-enforced', {}) + + bob_github = self.make_elsewhere('github', 2, 'bob') + bob_github.opt_in('bob') + + alice.take_over(bob_github, have_confirmation=True) + self.db.self_check() + + def test_take_over_fails_if_secondary_has_identity_info(self): + TT = self.db.one("SELECT id FROM countries WHERE code='TT'") + alice = self.make_participant('alice') + + bob_github = self.make_elsewhere('github', 2, 'bob') + bob = bob_github.opt_in('bob')[0].participant + bob.add_email('bob@example.com') + bob.verify_email('bob@example.com', bob.get_email('bob@example.com').nonce) + bob.store_identity_info(TT, 'nothing-enforced', {}) + + pytest.raises(WontTakeOverWithIdentities, alice.take_over, bob_github) + def test_idempotent(self): alice_twitter = self.make_elsewhere('twitter', 1, 'alice') bob_github = self.make_elsewhere('github', 2, 'bob') diff --git a/tests/py/test_participant_identities.py b/tests/py/test_participant_identities.py new file mode 100644 index 0000000000..2e525df86b --- /dev/null +++ b/tests/py/test_participant_identities.py @@ -0,0 +1,179 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + +from cryptography.fernet import InvalidToken +from gratipay.testing import Harness +from gratipay.models.participant.mixins import identity, Identity +from gratipay.models.participant.mixins.identity import _validate_info, rekey +from gratipay.models.participant.mixins.identity import ParticipantIdentityInfoInvalid +from gratipay.models.participant.mixins.identity import ParticipantIdentitySchemaUnknown +from gratipay.security.crypto import EncryptingPacker, Fernet +from psycopg2 import IntegrityError +from pytest import raises + + +class Tests(Harness): + + @classmethod + def setUpClass(cls): + Harness.setUpClass() + cls.TT = cls.db.one("SELECT id FROM countries WHERE code='TT'") + cls.US = cls.db.one("SELECT id FROM countries WHERE code='US'") + + def _failer(info): + raise ParticipantIdentityInfoInvalid('You failed.') + identity.schema_validators['impossible'] = _failer + + @classmethod + def tearDownClass(cls): + del identity.schema_validators['impossible'] + + def setUp(self): + self.crusher = self.make_participant('crusher', email_address='foo@example.com') + + def assert_events(self, crusher_id, identity_ids, country_ids, actions): + events = self.db.all("SELECT * FROM events ORDER BY ts ASC") + nevents = len(events) + + assert [e.type for e in events] == ['participant'] * nevents + assert [e.payload['id'] for e in events] == [crusher_id] * nevents + assert [e.payload['identity_id'] for e in events] == identity_ids + assert [e.payload['country_id'] for e in events] == country_ids + assert [e.payload['action'] for e in events] == actions + + + # rii - retrieve_identity_info + + def test_rii_retrieves_identity_info(self): + self.crusher.store_identity_info(self.US, 'nothing-enforced', {'name': 'Crusher'}) + assert self.crusher.retrieve_identity_info(self.US)['name'] == 'Crusher' + + def test_rii_retrieves_identity_when_there_are_multiple_identities(self): + self.crusher.store_identity_info(self.US, 'nothing-enforced', {'name': 'Crusher'}) + self.crusher.store_identity_info(self.TT, 'nothing-enforced', {'name': 'Bruiser'}) + assert self.crusher.retrieve_identity_info(self.US)['name'] == 'Crusher' + assert self.crusher.retrieve_identity_info(self.TT)['name'] == 'Bruiser' + + def test_rii_returns_None_if_there_is_no_identity_info(self): + assert self.crusher.retrieve_identity_info(self.US) is None + + def test_rii_logs_event(self): + iid = self.crusher.store_identity_info(self.TT, 'nothing-enforced', {'name': 'Crusher'}) + self.crusher.retrieve_identity_info(self.TT) + self.assert_events( self.crusher.id + , [iid, iid] + , [self.TT, self.TT] + , ['insert identity', 'retrieve identity'] + ) + + def test_rii_still_logs_an_event_when_noop(self): + self.crusher.retrieve_identity_info(self.TT) + self.assert_events( self.crusher.id + , [None] + , [self.TT] + , ['retrieve identity'] + ) + + + # lim - list_identity_metadata + + def test_lim_lists_identity_metadata(self): + self.crusher.store_identity_info(self.US, 'nothing-enforced', {'name': 'Crusher'}) + assert [x.country.code for x in self.crusher.list_identity_metadata()] == ['US'] + + def test_lim_lists_metadata_for_multiple_identities(self): + for country in (self.US, self.TT): + self.crusher.store_identity_info(country, 'nothing-enforced', {'name': 'Crusher'}) + assert [x.country.code for x in self.crusher.list_identity_metadata()] == ['TT', 'US'] + + + # sii - store_identity_info + + def test_sii_sets_identity_info(self): + self.crusher.store_identity_info(self.TT, 'nothing-enforced', {'name': 'Crusher'}) + assert [x.country.code for x in self.crusher.list_identity_metadata()] == ['TT'] + + def test_sii_sets_a_second_identity(self): + self.crusher.store_identity_info(self.TT, 'nothing-enforced', {'name': 'Crusher'}) + self.crusher.store_identity_info(self.US, 'nothing-enforced', {'name': 'Crusher'}) + assert [x.country.code for x in self.crusher.list_identity_metadata()] == ['TT', 'US'] + + def test_sii_overwrites_first_identity(self): + self.crusher.store_identity_info(self.TT, 'nothing-enforced', {'name': 'Crusher'}) + self.crusher.store_identity_info(self.TT, 'nothing-enforced', {'name': 'Bruiser'}) + assert [x.country.code for x in self.crusher.list_identity_metadata()] == ['TT'] + assert self.crusher.retrieve_identity_info(self.TT)['name'] == 'Bruiser' + + def test_sii_validates_identity(self): + raises( ParticipantIdentityInfoInvalid + , self.crusher.store_identity_info + , self.TT + , 'impossible' + , {'foo': 'bar'} + ) + + def test_sii_happily_overwrites_schema_name(self): + packed = Identity.encrypting_packer.pack({'name': 'Crusher'}) + self.db.run( "INSERT INTO participant_identities " + "(participant_id, country_id, schema_name, info) " + "VALUES (%s, %s, %s, %s)" + , (self.crusher.id, self.TT, 'flah', packed) + ) + assert [x.schema_name for x in self.crusher.list_identity_metadata()] == ['flah'] + self.crusher.store_identity_info(self.TT, 'nothing-enforced', {'name': 'Crusher'}) + assert [x.schema_name for x in self.crusher.list_identity_metadata()] == \ + ['nothing-enforced'] + + def test_sii_logs_event(self): + iid = self.crusher.store_identity_info(self.TT, 'nothing-enforced', {'name': 'Crusher'}) + self.assert_events(self.crusher.id, [iid], [self.TT], ['insert identity']) + + + # _vi - _validate_info + + def test__vi_validates_info(self): + err = raises(ParticipantIdentityInfoInvalid, _validate_info, 'impossible', {'foo': 'bar'}) + assert err.value.message == 'You failed.' + + def test__vi_chokes_on_unknown_schema(self): + err = raises(ParticipantIdentitySchemaUnknown, _validate_info, 'floo-floo', {'foo': 'bar'}) + assert err.value.message == "unknown schema 'floo-floo'" + + + # fine - fail_if_no_email + + def test_fine_fails_if_no_email(self): + bruiser = self.make_participant('bruiser') + error = raises( IntegrityError + , bruiser.store_identity_info + , self.US + , 'nothing-enforced' + , {'name': 'Bruiser'} + ).value + assert error.pgcode == '23100' + assert bruiser.list_identity_metadata() == [] + + + # rekey + + def rekey_setup(self): + self.crusher.store_identity_info(self.US, 'nothing-enforced', {'name': 'Crusher'}) + self.db.run("UPDATE participant_identities " + "SET _info_last_keyed=_info_last_keyed - '6 months'::interval") + old_key = str(self.client.website.env.crypto_keys) + return EncryptingPacker(Fernet.generate_key(), old_key) + + def test_rekey_rekeys(self): + assert rekey(self.db, self.rekey_setup()) == 1 + + def test_rekeying_causes_old_packer_to_fail(self): + rekey(self.db, self.rekey_setup()) + raises(InvalidToken, self.crusher.retrieve_identity_info, self.US) + + def test_rekeyed_data_is_accessible_with_new_key(self): + self.crusher.encrypting_packer = self.rekey_setup() + assert self.crusher.retrieve_identity_info(self.US) == {'name': 'Crusher'} + + def test_rekey_ignores_recently_keyed_records(self): + self.crusher.encrypting_packer = self.rekey_setup() + assert rekey(self.db, self.crusher.encrypting_packer) == 1 + assert rekey(self.db, self.crusher.encrypting_packer) == 0 diff --git a/tests/py/test_security.py b/tests/py/test_security.py index c02cf669cd..fdfb5024ae 100644 --- a/tests/py/test_security.py +++ b/tests/py/test_security.py @@ -8,7 +8,7 @@ from base64 import urlsafe_b64decode from cryptography.fernet import Fernet, InvalidToken from gratipay import security -from gratipay.models.participant import Participant +from gratipay.models.participant.mixins import Identity from gratipay.security.crypto import EncryptingPacker from gratipay.testing import Harness from pytest import raises @@ -57,11 +57,11 @@ def test_ahtr_sets_x_xss_protection(self): b'5TdyoJsll5nMAicg==' def test_ep_packs_encryptingly(self): - packed = Participant.encrypting_packer.pack({"foo": "bar"}) + packed = Identity.encrypting_packer.pack({"foo": "bar"}) assert urlsafe_b64decode(packed)[0] == b'\x80' # Fernet version def test_ep_unpacks_decryptingly(self): - assert Participant.encrypting_packer.unpack(self.packed) == {"foo": "bar"} + assert Identity.encrypting_packer.unpack(self.packed) == {"foo": "bar"} def test_ep_fails_to_unpack_old_data_with_a_new_key(self): encrypting_packer = EncryptingPacker(Fernet.generate_key()) @@ -78,5 +78,5 @@ def test_ep_leaks_timestamp_derp(self): assert datetime.datetime.fromtimestamp(timestamp).year == 2016 def test_ep_demands_bytes(self): - raises(TypeError, Participant.encrypting_packer.unpack, buffer('buffer')) - raises(TypeError, Participant.encrypting_packer.unpack, 'unicode') + raises(TypeError, Identity.encrypting_packer.unpack, buffer('buffer')) + raises(TypeError, Identity.encrypting_packer.unpack, 'unicode')