This repository has been archived by the owner on Feb 8, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 308
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into new-project
- Loading branch information
Showing
29 changed files
with
618 additions
and
366 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
#!/bin/sh | ||
# This is a script to run under the Heroku Scheduler add-on to periodically | ||
# sync our database with the npm registry. | ||
|
||
set -e | ||
cd "`dirname $0`/.." | ||
|
||
# Install dependencies. | ||
# ===================== | ||
|
||
# cmake - required by ... | ||
curl https://cmake.org/files/v3.6/cmake-3.6.2-Linux-x86_64.tar.gz > cmake.tgz | ||
echo '5df4b69d9e85093ae78b1070d5cb9f824ce0bdd02528948c3f6a740e240083e5 cmake.tgz' \ | ||
| sha256sum -c /dev/stdin --status | ||
tar zxf cmake.tgz | ||
PATH=/app/cmake-3.6.2-Linux-x86_64/bin:$PATH | ||
|
||
# yajl | ||
git clone https://github.com/lloyd/yajl.git | ||
cd yajl | ||
git checkout 2.1.0 | ||
./configure -p /app/.heroku/python | ||
make install | ||
cd .. | ||
|
||
# python | ||
pip install ijson==2.3.0 | ||
pip install -e . | ||
|
||
|
||
# Sync with npm. | ||
# ============== | ||
|
||
curl https://registry.npmjs.com/-/all | sync-npm serialize | sync-npm upsert |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/usr/bin/env python | ||
"""verify-identity.py <participant_id>, <country_code> | ||
""" | ||
from __future__ import absolute_import, division, print_function, unicode_literals | ||
|
||
import sys | ||
|
||
from gratipay import wireup | ||
from gratipay.models.participant import Participant | ||
from gratipay.models.country import Country | ||
|
||
wireup.db(wireup.env()) | ||
|
||
participant = Participant.from_id(int(sys.argv[1])) | ||
country = Country.from_code(sys.argv[2]) | ||
participant.set_identity_verification(country.id, True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
from __future__ import absolute_import, division, print_function, unicode_literals | ||
|
||
import sys | ||
|
||
import requests | ||
|
||
from gratipay.utils import markdown | ||
from gratipay.utils.threaded_map import threaded_map | ||
from threading import Lock | ||
|
||
|
||
log_lock = Lock() | ||
|
||
def log(*a, **kw): | ||
with log_lock: | ||
print(*a, file=sys.stderr, **kw) | ||
|
||
|
||
def http_fetch(package_name): | ||
r = requests.get('https://registry.npmjs.com/' + package_name) | ||
if r.status_code != 200: | ||
log(r.status_code, 'for', package_name) | ||
return None | ||
return r.json() | ||
|
||
|
||
def Syncer(db): | ||
def sync(dirty, fetch=http_fetch): | ||
"""Update all info for one package. | ||
""" | ||
log(dirty.name) | ||
full = fetch(dirty.name) | ||
|
||
if not full: | ||
return | ||
elif full['name'] != dirty.name: | ||
log('expected', dirty.name, 'got', full['name']) | ||
return | ||
elif 'readme' not in full: | ||
log('no readme in', full['name']) | ||
return | ||
|
||
db.run(''' | ||
UPDATE packages | ||
SET readme=%s | ||
, readme_raw=%s | ||
, readme_type=%s | ||
WHERE package_manager=%s | ||
AND name=%s | ||
''', ( markdown.marky(full['readme']) | ||
, full['readme'] | ||
, 'x-markdown/npm' | ||
, dirty.package_manager | ||
, dirty.name | ||
)) | ||
|
||
return sync | ||
|
||
|
||
def sync_all(db): | ||
dirty = db.all('SELECT package_manager, name FROM packages WHERE readme_raw IS NULL ' | ||
'ORDER BY package_manager DESC, name DESC') | ||
threaded_map(Syncer(db), dirty, 4) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
"""Sync our database with package managers. Just npm for now. | ||
""" | ||
from __future__ import absolute_import, division, print_function, unicode_literals | ||
|
||
import argparse | ||
import csv | ||
import sys | ||
import time | ||
import uuid | ||
|
||
from gratipay.package_managers import readmes as _readmes | ||
|
||
|
||
log = lambda *a: print(*a, file=sys.stderr) | ||
NULL = uuid.uuid4().hex | ||
|
||
|
||
def import_ijson(): | ||
import ijson.backends.yajl2_cffi as ijson | ||
return ijson | ||
|
||
|
||
def arrayize(seq): | ||
"""Given a sequence of str, return a Postgres array literal str. | ||
""" | ||
array = [] | ||
for item in seq: | ||
assert type(item) is str | ||
escaped = item.replace(b'\\', b'\\\\').replace(b'"', b'\\"') | ||
quoted = b'"' + escaped + b'"' | ||
array.append(quoted) | ||
joined = b', '.join(array) | ||
return b'{' + joined + b'}' | ||
|
||
|
||
def serialize_one(out, package): | ||
"""Takes a package and emits a serialization suitable for COPY. | ||
""" | ||
if not package or package['name'].startswith('_'): | ||
log('skipping', package) | ||
return 0 | ||
|
||
row = ( package['package_manager'] | ||
, package['name'] | ||
, package['description'] | ||
, arrayize(package['emails']) | ||
) | ||
|
||
out.writerow(row) | ||
return 1 | ||
|
||
|
||
def serialize(args): | ||
"""Consume raw JSON from the npm registry and spit out CSV for Postgres. | ||
""" | ||
ijson = import_ijson() | ||
|
||
path = args.path | ||
parser = ijson.parse(open(path)) | ||
start = time.time() | ||
package = None | ||
nprocessed = 0 | ||
out = csv.writer(sys.stdout) | ||
|
||
def log_stats(): | ||
log("processed {} packages in {:3.0f} seconds" | ||
.format(nprocessed, time.time() - start)) | ||
|
||
for prefix, event, value in parser: | ||
|
||
if not prefix and event == b'map_key': | ||
|
||
# Flush the current package. We count on the first package being garbage. | ||
processed = serialize_one(out, package) | ||
nprocessed += processed | ||
if processed and not(nprocessed % 1000): | ||
log_stats() | ||
|
||
# Start a new package. | ||
package = { 'package_manager': b'npm' | ||
, 'name': value | ||
, 'description': b'' | ||
, 'emails': [] | ||
} | ||
|
||
key = lambda k: package['name'] + b'.' + k | ||
|
||
if event == b'string': | ||
assert type(value) is unicode # Who knew? Seems to decode only for `string`. | ||
value = value.encode('utf8') | ||
if prefix == key(b'description'): | ||
package['description'] = value | ||
elif prefix in (key(b'author.email'), key(b'maintainers.item.email')): | ||
package['emails'].append(value) | ||
|
||
nprocessed += serialize_one(out, package) # Don't forget the last one! | ||
log_stats() | ||
|
||
|
||
def upsert(args): | ||
from gratipay import wireup | ||
db = wireup.db(wireup.env()) | ||
fp = open(args.path) | ||
with db.get_cursor() as cursor: | ||
assert cursor.connection.encoding == 'UTF8' | ||
|
||
# http://tapoueh.org/blog/2013/03/15-batch-update.html | ||
cursor.run("CREATE TEMP TABLE updates (LIKE packages INCLUDING ALL) ON COMMIT DROP") | ||
cursor.copy_expert('COPY updates (package_manager, name, description, emails) ' | ||
"FROM STDIN WITH (FORMAT csv, NULL '%s')" % NULL, fp) | ||
cursor.run(""" | ||
WITH updated AS ( | ||
UPDATE packages p | ||
SET package_manager = u.package_manager | ||
, description = u.description | ||
, emails = u.emails | ||
FROM updates u | ||
WHERE p.name = u.name | ||
RETURNING p.name | ||
) | ||
INSERT INTO packages(package_manager, name, description, emails) | ||
SELECT package_manager, name, description, emails | ||
FROM updates u LEFT JOIN updated USING(name) | ||
WHERE updated.name IS NULL | ||
GROUP BY u.package_manager, u.name, u.description, u.emails | ||
""") | ||
|
||
|
||
def readmes(args): | ||
from gratipay import wireup | ||
db = wireup.db(wireup.env()) | ||
_readmes.sync_all(db) | ||
|
||
|
||
def parse_args(argv): | ||
p = argparse.ArgumentParser() | ||
p.add_argument('command', choices=['serialize', 'upsert', 'readmes']) | ||
p.add_argument('path', help='the path to the input file', nargs='?', default='/dev/stdin') | ||
return p.parse_args(argv) | ||
|
||
|
||
def main(argv=sys.argv): | ||
args = parse_args(argv[1:]) | ||
globals()[args.command](args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,21 @@ | ||
from subprocess import Popen, PIPE | ||
|
||
from markupsafe import Markup | ||
import misaka as m # http://misaka.61924.nl/ | ||
|
||
|
||
def render(markdown): | ||
return Markup(m.html( | ||
markdown, | ||
extensions=m.EXT_AUTOLINK | m.EXT_STRIKETHROUGH | m.EXT_NO_INTRA_EMPHASIS, | ||
render_flags=m.HTML_SKIP_HTML | m.HTML_TOC | m.HTML_SMARTYPANTS | m.HTML_SAFELINK | ||
)) | ||
|
||
|
||
def marky(markdown): | ||
"""Process markdown the same way npm does. | ||
""" | ||
if type(markdown) is unicode: | ||
markdown = markdown.encode('utf8') | ||
marky = Popen(("marky-markdown", "/dev/stdin"), stdin=PIPE, stdout=PIPE) | ||
return Markup(marky.communicate(markdown)[0]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from __future__ import absolute_import, division, print_function, unicode_literals | ||
|
||
from multiprocessing.dummy import Pool as ThreadPool | ||
|
||
|
||
class ExceptionWrapped(Exception): | ||
pass | ||
|
||
|
||
def threaded_map(func, iterable, threads=5): | ||
pool = ThreadPool(threads) | ||
def g(*a, **kw): | ||
# Without this wrapper we get a traceback from inside multiprocessing. | ||
try: | ||
return func(*a, **kw) | ||
except Exception as e: | ||
import traceback | ||
raise ExceptionWrapped(e, traceback.format_exc()) | ||
try: | ||
r = pool.map(g, iterable) | ||
except ExceptionWrapped as e: | ||
print(e.args[1]) | ||
raise e.args[0] | ||
pool.close() | ||
pool.join() | ||
return r |
Oops, something went wrong.