diff --git a/.travis.yml b/.travis.yml index b846203f7e..ad4c559667 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,7 @@ before_install: # Sometimes ya just halfta ... - git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 && ./configure && sudo make install && cd .. - - npm install -g marky-markdown + - npm install marky-markdown cache: directories: - env/bin @@ -23,6 +23,7 @@ install: - env/bin/pip install --upgrade ijson==2.3.0 before_script: - echo "DATABASE_URL=dbname=gratipay" | tee -a tests/local.env local.env + - echo "REQUIRE_YAJL=true" | tee -a tests/local.env local.env - psql -U postgres -c 'CREATE DATABASE "gratipay";' - if [ "${TRAVIS_BRANCH}" = "master" -a "${TRAVIS_PULL_REQUEST}" = "false" ]; then rm -rfv tests/py/fixtures; fi script: LD_LIBRARY_PATH=/usr/local/lib make bgrun test doc diff --git a/Makefile b/Makefile index eef1268f7f..fb634be968 100644 --- a/Makefile +++ b/Makefile @@ -63,7 +63,7 @@ pyflakes: env $(env_bin)/pyflakes *.py bin gratipay tests test: test-schema - $(py_test) --cov gratipay ./tests/ + $(py_test) -vv --cov gratipay ./tests/ @$(MAKE) --no-print-directory pyflakes pytest: env diff --git a/bin/our-marky-markdown.js b/bin/our-marky-markdown.js new file mode 100755 index 0000000000..1035afae8c --- /dev/null +++ b/bin/our-marky-markdown.js @@ -0,0 +1,21 @@ +#!/usr/bin/env node +// Extend marky-markdown.js to support the package argument: +// https://www.npmjs.com/package/marky-markdown#npm-packages + +var fs = require('fs') +var path = require('path') +var marky = require('marky-markdown') + +if (process.argv.length < 3) { + console.log('Usage:\n\nour-marky-markdown some.md pkg > some.html') + process.exit() +} + +var filePath = path.resolve(process.cwd(), process.argv[2]) + +fs.readFile(filePath, function (err, data) { + if (err) throw err + var package = process.argv[3] ? JSON.parse(process.argv[3]) : null; + var html = marky(data.toString(), {package: package}) + process.stdout.write(html) +}) diff --git a/defaults.env b/defaults.env index cd08ea16fa..e9eb67199f 100644 --- a/defaults.env +++ b/defaults.env @@ -87,6 +87,8 @@ ASPEN_PROJECT_ROOT=. ASPEN_SHOW_TRACEBACKS=yes ASPEN_WWW_ROOT=www/ +# This is used in our Procfile. (PORT is also used but is provided by +# Heroku; we don't set it ourselves in our app config.) # https://github.com/benoitc/gunicorn/issues/186 GUNICORN_OPTS="--workers=1 --timeout=99999999" @@ -96,5 +98,10 @@ TEAM_REVIEW_REPO=gratipay/test-gremlin TEAM_REVIEW_USERNAME= TEAM_REVIEW_TOKEN= +# anything Postgres can interpret as an interval RESEND_VERIFICATION_THRESHOLD="3 minutes" + RAISE_SIGNIN_NOTIFICATIONS=no + +# speeds up npm syncing; should be true on production and Travis +REQUIRE_YAJL=false diff --git a/gratipay/package_managers/readmes.py b/gratipay/package_managers/readmes.py index 17b43da52a..dc84ca649c 100644 --- a/gratipay/package_managers/readmes.py +++ b/gratipay/package_managers/readmes.py @@ -24,8 +24,8 @@ def http_fetch(package_name): return r.json() -def Syncer(db): - def sync(dirty, fetch=http_fetch): +def Fetcher(db): + def fetch(dirty, fetch=http_fetch): """Update all info for one package. """ log(dirty.name) @@ -43,23 +43,58 @@ def sync(dirty, fetch=http_fetch): db.run(''' UPDATE packages - SET readme=%s + SET readme_needs_to_be_processed=true , readme_raw=%s , readme_type=%s WHERE package_manager=%s AND name=%s - ''', ( markdown.marky(full['readme']) - , full['readme'] - , 'x-markdown/npm' + ''', ( full['readme'] + , 'x-markdown/marky' + , dirty.package_manager + , dirty.name + )) + + return fetch + + +def Processor(db): + def process(dirty): + """Processes the readme for a single page. + """ + log(dirty.name) + raw = db.one( 'SELECT readme_raw FROM packages ' + 'WHERE package_manager=%s and name=%s and readme_needs_to_be_processed' + , (dirty.package_manager, dirty.name) + ) + if raw is None: + return + processed = markdown.render_like_npm(raw) + db.run(''' + + UPDATE packages + SET readme=%s + , readme_needs_to_be_processed=false + WHERE package_manager=%s + AND name=%s + + ''', ( processed , dirty.package_manager , dirty.name )) - return sync + return process + + +def fetch(db): + dirty = db.all('SELECT package_manager, name ' + 'FROM packages WHERE readme_raw IS NULL ' + 'ORDER BY package_manager DESC, name DESC') + threaded_map(Fetcher(db), dirty, 4) -def sync_all(db): - dirty = db.all('SELECT package_manager, name FROM packages WHERE readme_raw IS NULL ' +def process(db): + dirty = db.all('SELECT id, package_manager, name, description, readme_raw ' + 'FROM packages WHERE readme_needs_to_be_processed' 'ORDER BY package_manager DESC, name DESC') - threaded_map(Syncer(db), dirty, 4) + threaded_map(Processor(db), dirty, 4) diff --git a/gratipay/package_managers/sync.py b/gratipay/package_managers/sync.py index f6a5a093b3..78ea9532d9 100644 --- a/gratipay/package_managers/sync.py +++ b/gratipay/package_managers/sync.py @@ -8,6 +8,7 @@ import time import uuid +from gratipay import wireup from gratipay.package_managers import readmes as _readmes @@ -15,8 +16,13 @@ NULL = uuid.uuid4().hex -def import_ijson(): - import ijson.backends.yajl2_cffi as ijson +# helpers + +def import_ijson(env): + if env.require_yajl: + import ijson.backends.yajl2_cffi as ijson + else: + import ijson return ijson @@ -50,10 +56,12 @@ def serialize_one(out, package): return 1 -def serialize(args): +# cli subcommands + +def serialize(env, args, _): """Consume raw JSON from the npm registry and spit out CSV for Postgres. """ - ijson = import_ijson() + ijson = import_ijson(env) path = args.path parser = ijson.parse(open(path)) @@ -97,9 +105,9 @@ def log_stats(): log_stats() -def upsert(args): - from gratipay import wireup - db = wireup.db(wireup.env()) +def upsert(env, args, db): + """Take a CSV file from stdin and load it into Postgres. + """ fp = open(args.path) with db.get_cursor() as cursor: assert cursor.connection.encoding == 'UTF8' @@ -128,19 +136,25 @@ def upsert(args): """) -def readmes(args): - from gratipay import wireup - db = wireup.db(wireup.env()) - _readmes.sync_all(db) +def fetch_readmes(env, args, db): + _readmes.fetch(db) + + +def process_readmes(env, args, db): + _readmes.process(db) + +# cli plumbing def parse_args(argv): p = argparse.ArgumentParser() - p.add_argument('command', choices=['serialize', 'upsert', 'readmes']) + p.add_argument('command', choices=['serialize', 'upsert', 'fetch-readmes', 'process-readmes']) p.add_argument('path', help='the path to the input file', nargs='?', default='/dev/stdin') return p.parse_args(argv) def main(argv=sys.argv): + env = wireup.env() args = parse_args(argv[1:]) - globals()[args.command](args) + db = wireup.db(env) + globals()[args.command.replace('-', '_')](env, args, db) diff --git a/gratipay/testing/__init__.py b/gratipay/testing/__init__.py index 349b302b64..190d55cb24 100644 --- a/gratipay/testing/__init__.py +++ b/gratipay/testing/__init__.py @@ -3,8 +3,13 @@ from __future__ import absolute_import, division, print_function, unicode_literals from decimal import Decimal -from gratipay.models.participant import Participant -from gratipay.models.team import Team + +import pytest +from aspen import log_dammit + +from ..models.participant import Participant +from ..models.team import Team +from ..utils import markdown D = Decimal #: P = Participant.from_username #: @@ -45,3 +50,17 @@ def debug_http(): requests_log = logging.getLogger("requests.packages.urllib3") requests_log.setLevel(logging.DEBUG) requests_log.propagate = True + + +# Provide a decorator to skip tests when marky-markdown is missing. + +try: + markdown.render_like_npm('test') +except OSError as exc: + MISSING_MARKY_MARKDOWN = True + log_dammit('Will skip marky-markdown-related tests because:', exc.args[0]) +else: + MISSING_MARKY_MARKDOWN = False + +def skipif_missing_marky_markdown(func): + return pytest.mark.skipif(MISSING_MARKY_MARKDOWN, reason="missing marky-markdown")(func) diff --git a/gratipay/utils/markdown.py b/gratipay/utils/markdown.py index 3c581854c1..8120824e93 100644 --- a/gratipay/utils/markdown.py +++ b/gratipay/utils/markdown.py @@ -1,10 +1,19 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + from subprocess import Popen, PIPE -from markupsafe import Markup +import json import misaka as m # http://misaka.61924.nl/ +from markupsafe import Markup def render(markdown): + """Process markdown approximately the same way that GitHub used to. + + (Note that as of November, 2016 they are migrating to CommonMark, so we are + starting to drift.) + + """ return Markup(m.html( markdown, extensions=m.EXT_AUTOLINK | m.EXT_STRIKETHROUGH | m.EXT_NO_INTRA_EMPHASIS, @@ -12,10 +21,26 @@ def render(markdown): )) -def marky(markdown): +def render_like_npm(markdown, package=None): """Process markdown the same way npm does. + + Package should be a dict representing the package. If it includes `name` + and `description` then the first h1 and paragraph will have a + 'package-{name,description}-redundant' class added to them if they're + similar enough. If it includes `repository.url` then links will be changed + somehow. For details consult the docs and code: + + https://github.com/npm/marky-markdown + """ if type(markdown) is unicode: markdown = markdown.encode('utf8') - marky = Popen(("marky-markdown", "/dev/stdin"), stdin=PIPE, stdout=PIPE) - return Markup(marky.communicate(markdown)[0]) + cmd = ("bin/our-marky-markdown.js", "/dev/stdin") + if package: + cmd += (json.dumps(package),) + marky = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE) + out, err = marky.communicate(markdown) + if marky.wait() > 0: + raise OSError(err) + return out + diff --git a/gratipay/wireup.py b/gratipay/wireup.py index 8aa1dd220e..9967e5da05 100644 --- a/gratipay/wireup.py +++ b/gratipay/wireup.py @@ -432,10 +432,8 @@ def env(): TEAM_REVIEW_USERNAME = unicode, TEAM_REVIEW_TOKEN = unicode, RAISE_SIGNIN_NOTIFICATIONS = is_yesish, - RESEND_VERIFICATION_THRESHOLD = unicode, # anything Postgres can interpret as an interval - - # This is used in our Procfile. (PORT is also used but is provided by - # Heroku; we don't set it ourselves in our app config.) + RESEND_VERIFICATION_THRESHOLD = unicode, + REQUIRE_YAJL = is_yesish, GUNICORN_OPTS = unicode, ) diff --git a/requirements.txt b/requirements.txt index 0e67de97d9..23b5640468 100644 --- a/requirements.txt +++ b/requirements.txt @@ -63,4 +63,6 @@ ./vendor/ipaddress-1.0.16.tar.gz ./vendor/cryptography-1.3.2.tar.gz +./vendor/ijson-2.3.tar.gz + -e . diff --git a/sql/branch.sql b/sql/branch.sql new file mode 100644 index 0000000000..a9e05ce01b --- /dev/null +++ b/sql/branch.sql @@ -0,0 +1 @@ +ALTER TABLE packages ADD COLUMN readme_needs_to_be_processed bool NOT NULL DEFAULT true; diff --git a/tests/py/test_markdown.py b/tests/py/test_markdown.py index 7b140148cb..1c87fd933c 100644 --- a/tests/py/test_markdown.py +++ b/tests/py/test_markdown.py @@ -1,12 +1,77 @@ -from gratipay.testing import Harness +from __future__ import absolute_import, division, print_function, unicode_literals + +from gratipay.testing import Harness, skipif_missing_marky_markdown from gratipay.utils import markdown from HTMLParser import HTMLParser + class TestMarkdown(Harness): - def test_marky_works(self): + # render + + def test_render_renders(self): + expected = "
Example
\n" + actual = markdown.render('Example') + assert expected == actual + + def test_render_escapes_scripts(self): + expected = 'Example alert “hi”;
\n' + actual = markdown.render('Example ') + assert expected == actual + + def test_render_renders_http_links(self): + expected = '\n' + assert markdown.render('[foo](http://example.com/)') == expected + expected = '\n' + assert markdown.render('[foo](javascript:foo)
\n' + assert markdown.render('[foo](javascript:foo)') == expected + expected = '<javascript:foo>
\n' + assert markdown.render('foo
\n' + assert markdown.render('foo') == expected + expected = 'foo
\n' + assert markdown.render('foo') == expected + expected = 'foo
\n' + assert markdown.render('foo') == expected + + def test_render_autolinks(self): + expected = '\n' + actual = markdown.render('http://google.com/') + assert expected == actual + + def test_render_no_intra_emphasis(self): + expected = 'Examples like this_one and this other_one.
\n' + actual = markdown.render('Examples like this_one and this other_one.') + assert expected == actual + + + # rln - render_like_npm + + @skipif_missing_marky_markdown + def test_rln_works(self): md = "**Hello World!**" - actual = HTMLParser().unescape(markdown.marky(md)).strip() + actual = HTMLParser().unescape(markdown.render_like_npm(md)).strip() expected = 'Hello World!
' assert actual == expected + + @skipif_missing_marky_markdown + def test_rln_handles_npm_package(self): + md = "# Greetings, program!\nGreetings. Program." + pkg = {'name': 'greetings-program', 'description': 'Greetings, program.'} + actual = HTMLParser().unescape(markdown.render_like_npm(md, pkg)).strip() + expected = '''\ +Greetings. Program.
''' + assert actual == expected diff --git a/tests/py/test_npm_sync.py b/tests/py/test_npm_sync.py index 4e0e4579bf..ac5b40db48 100644 --- a/tests/py/test_npm_sync.py +++ b/tests/py/test_npm_sync.py @@ -1,13 +1,11 @@ -"""Tests for syncing npm. Requires a `pip install ijson`, which requires yajl. Good luck! :^) +"""Tests for syncing npm. """ from __future__ import absolute_import, division, print_function, unicode_literals from subprocess import Popen, PIPE -import pytest -from gratipay.utils import markdown -from gratipay.testing import Harness -from gratipay.package_managers import readmes, sync +from gratipay.testing import Harness, skipif_missing_marky_markdown +from gratipay.package_managers import readmes def load(raw): @@ -19,22 +17,6 @@ def load(raw): ).communicate(serialized)[0] -try: - sync.import_ijson() -except ImportError: - missing_ijson = True -else: - missing_ijson = False - -try: - markdown.marky('test') -except OSError: - missing_marky_markdown = True -else: - missing_marky_markdown = False - -@pytest.mark.skipif(missing_ijson, reason="missing ijson") -@pytest.mark.skipif(missing_marky_markdown, reason="missing marky-markdown") class Tests(Harness): def test_packages_starts_empty(self): @@ -101,9 +83,9 @@ def test_sn_handles_empty_description_and_emails(self): assert package.emails == [] - # rs - readmes.Syncer + # rf - readmes.Fetcher - def test_rs_syncs_a_readme(self): + def test_rf_fetches_a_readme(self): self.db.run("INSERT INTO packages (package_manager, name, description, emails) " "VALUES ('npm', 'foo-package', 'A package', ARRAY[]::text[])") @@ -114,12 +96,42 @@ class DirtyPackage: def fetch(name): return {'name': 'foo-package', 'readme': '# Greetings, program!'} - readmes.Syncer(self.db)(DirtyPackage(), fetch=fetch) + readmes.Fetcher(self.db)(DirtyPackage(), fetch=fetch) + + package = self.db.one('SELECT * FROM packages') + assert package.name == 'foo-package' + assert package.description == 'A package' + assert package.readme == '' + assert package.readme_needs_to_be_processed + assert package.readme_raw == '# Greetings, program!' + assert package.readme_type == 'x-markdown/marky' + assert package.emails == [] + + + # rp - readmes.Processor + + @skipif_missing_marky_markdown + def test_rp_processes_a_readme(self): + self.db.run(''' + + INSERT + INTO packages (package_manager, name, description, readme_raw, readme_type, emails) + VALUES ('npm', 'foo-package', 'A package', '# Greetings, program!', 'x-markdown/marky', + ARRAY[]::text[]) + + ''') + + class DirtyPackage: + package_manager = 'npm' + name = 'foo-package' + + readmes.Processor(self.db)(DirtyPackage()) package = self.db.one('SELECT * FROM packages') assert package.name == 'foo-package' assert package.description == 'A package' assert package.readme == 'Example
\n" - actual = markdown.render('Example') - assert expected == actual - - def test_markdown_render_escapes_scripts(self): - expected = 'Example alert “hi”;
\n' - actual = markdown.render('Example ') - assert expected == actual - - def test_markdown_render_renders_http_links(self): - expected = '\n' - assert markdown.render('[foo](http://example.com/)') == expected - expected = '\n' - assert markdown.render('[foo](javascript:foo)
\n' - assert markdown.render('[foo](javascript:foo)') == expected - expected = '<javascript:foo>
\n' - assert markdown.render('foo
\n' - assert markdown.render('foo') == expected - expected = 'foo
\n' - assert markdown.render('foo') == expected - expected = 'foo
\n' - assert markdown.render('foo') == expected - - def test_markdown_render_autolinks(self): - expected = '\n' - actual = markdown.render('http://google.com/') - assert expected == actual - - def test_markdown_render_no_intra_emphasis(self): - expected = 'Examples like this_one and this other_one.
\n' - actual = markdown.render('Examples like this_one and this other_one.') - assert expected == actual - def test_srau_retries_work_with_db(self): self.make_participant('deadbeef') def gen_test_username(): diff --git a/vendor/ijson-2.3.tar.gz b/vendor/ijson-2.3.tar.gz new file mode 100644 index 0000000000..e5dd11238d Binary files /dev/null and b/vendor/ijson-2.3.tar.gz differ