From 16ad018962e6dd53a3e73e79514df0c6d169812d Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Tue, 29 Nov 2016 19:27:17 -0500 Subject: [PATCH] Rip out READMEs after all They're too much work to keep up with and keep secure, especially when we want to add 30+ more package managers. --- gratipay/sync_npm/cli.py | 11 ++-- gratipay/sync_npm/fetch_readmes.py | 93 ---------------------------- gratipay/sync_npm/process_readmes.py | 49 --------------- gratipay/testing/__init__.py | 18 ------ gratipay/utils/markdown.py | 27 -------- tests/py/test_markdown.py | 24 +------ tests/py/test_sync_npm.py | 89 +------------------------- 7 files changed, 6 insertions(+), 305 deletions(-) delete mode 100644 gratipay/sync_npm/fetch_readmes.py delete mode 100644 gratipay/sync_npm/process_readmes.py diff --git a/gratipay/sync_npm/cli.py b/gratipay/sync_npm/cli.py index 4e3f062cef..835bb51eea 100644 --- a/gratipay/sync_npm/cli.py +++ b/gratipay/sync_npm/cli.py @@ -7,20 +7,18 @@ import argparse from gratipay import wireup -from gratipay.sync_npm import serialize, upsert, fetch_readmes, process_readmes +from gratipay.sync_npm import serialize, upsert def parse_args(argv): p = argparse.ArgumentParser() - p.add_argument('command', choices=['serialize', 'upsert', 'fetch-readmes', 'process-readmes']) + p.add_argument('command', choices=['serialize', 'upsert']) p.add_argument('path', help='the path to the input file', nargs='?', default='/dev/stdin') return p.parse_args(argv) subcommands = { 'serialize': serialize.main , 'upsert': upsert.main - , 'fetch-readmes': fetch_readmes.main - , 'process-readmes': process_readmes.main } @@ -30,10 +28,9 @@ def main(argv=sys.argv): Usage:: - sync-npm {serialize,upsert,fetch-readmes,process-readmes} {} + sync-npm {serialize,upsert} {} - ```` defaults to stdin. It's necessary for ``serialize`` and - ``upsert``, and silently ignored for ``{fetch,process}-readmes``. + ```` defaults to stdin. .. note:: Sphinx is expanding ``sys.argv`` in the parameter list. Sorry. :-/ diff --git a/gratipay/sync_npm/fetch_readmes.py b/gratipay/sync_npm/fetch_readmes.py deleted file mode 100644 index 69973454e3..0000000000 --- a/gratipay/sync_npm/fetch_readmes.py +++ /dev/null @@ -1,93 +0,0 @@ -# -*- coding: utf-8 -*- -"""Subcommand for fetching readmes. -""" -from __future__ import absolute_import, division, print_function, unicode_literals - -import requests - -from . import log -from ..utils.threaded_map import threaded_map - - -def fetch_from_public_registry(package_name): - """Fetch a package from the public npm registry. - """ - try: - r = requests.get('https://registry.npmjs.com/' + package_name) - except requests.ConnectionError: - return (600, None) # will be skipped and retried later - if r.status_code in (200, 404): - out = r.json() - else: - out = None - return r.status_code, out - - -def delete_package(db, dirty, clean): - db.run( 'DELETE FROM packages WHERE package_manager=%s AND name=%s' - , (dirty.package_manager, dirty.name) - ) - - -def update_package(db, dirty, clean): - db.run(''' - - UPDATE packages - SET readme_needs_to_be_processed=true - , readme_raw=%s - , readme_type=%s - WHERE package_manager=%s - AND name=%s - - ''', ( clean['readme'] - , 'x-markdown/marky' - , dirty.package_manager - , dirty.name - )) - - -def Fetcher(db, _fetch): - def fetch(dirty): - """Update all info for one package. - """ - log('fetching', dirty.name) - code, clean = _fetch(dirty.name) - - if code == 404: - log(dirty.name, 'is {}; deleting'.format(code)) - delete_package(db, dirty, clean) - return - - if code != 200: - assert clean is None - log(dirty.name, 'is {}; skipping'.format(code)) - return - - assert dirty.name == clean['name'] - - if 'readme' not in clean: - log(clean['name'], 'has no readme; adding an empty one') - clean['readme'] = '' - elif type(clean['readme']) is not unicode: - log(clean['name'], 'has a readme of type {} instead of unicode; ' - 'replacing with an empty one' - .format(type(clean['readme']))) - clean['readme'] = '' - - update_package(db, dirty, clean) - - return fetch - - -def main(env, args, db, sentrified, _fetch=fetch_from_public_registry): - """Populate ``readme_raw`` for all packages where ``readme_raw`` is null. - The ``readme_type`` is set to ``x-markdown/marky``, and - ``readme_needs_to_be_processed`` is set to ``true``. If the fetched package - is missing or malformed, we log the condition and continue. This runs in - four threads. - - """ - dirty = db.all('SELECT package_manager, name ' - 'FROM packages WHERE readme_raw IS NULL ' - 'ORDER BY package_manager DESC, name DESC') - threaded_map(sentrified(Fetcher(db, _fetch)), dirty, 4) diff --git a/gratipay/sync_npm/process_readmes.py b/gratipay/sync_npm/process_readmes.py deleted file mode 100644 index ec5b2efd24..0000000000 --- a/gratipay/sync_npm/process_readmes.py +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- -"""Subcommand for processing readmes. -""" -from __future__ import absolute_import, division, print_function, unicode_literals - -from . import log -from ..utils import markdown -from ..utils.threaded_map import threaded_map - - -def Processor(db, _render): - def process(dirty): - """Processes the readme for a single package. - """ - log('processing', dirty.name) - raw = db.one( 'SELECT readme_raw FROM packages ' - 'WHERE package_manager=%s and name=%s and readme_needs_to_be_processed' - , (dirty.package_manager, dirty.name) - ) - if raw is None: - return - processed = _render(raw) - db.run(''' - - UPDATE packages - SET readme=%s - , readme_needs_to_be_processed=false - WHERE package_manager=%s - AND name=%s - - ''', ( processed - , dirty.package_manager - , dirty.name - )) - - return process - - -def main(env, args, db, sentrified, _render=markdown.render_like_npm): - """For all packages where ``readme_needs_to_be_processed`` is ``true``, run - ``readme_raw`` through ``marky-markdown`` and store the result in - ``readme``. Reset ``readme_needs_to_be_processed`` to ``false``. This runs - in four threads. - - """ - dirty = db.all('SELECT package_manager, name ' - 'FROM packages WHERE readme_needs_to_be_processed ' - 'ORDER BY package_manager DESC, name DESC') - threaded_map(sentrified(Processor(db, _render)), dirty, 4) diff --git a/gratipay/testing/__init__.py b/gratipay/testing/__init__.py index 190d55cb24..f2a41bb4d0 100644 --- a/gratipay/testing/__init__.py +++ b/gratipay/testing/__init__.py @@ -4,12 +4,8 @@ from decimal import Decimal -import pytest -from aspen import log_dammit - from ..models.participant import Participant from ..models.team import Team -from ..utils import markdown D = Decimal #: P = Participant.from_username #: @@ -50,17 +46,3 @@ def debug_http(): requests_log = logging.getLogger("requests.packages.urllib3") requests_log.setLevel(logging.DEBUG) requests_log.propagate = True - - -# Provide a decorator to skip tests when marky-markdown is missing. - -try: - markdown.render_like_npm('test') -except OSError as exc: - MISSING_MARKY_MARKDOWN = True - log_dammit('Will skip marky-markdown-related tests because:', exc.args[0]) -else: - MISSING_MARKY_MARKDOWN = False - -def skipif_missing_marky_markdown(func): - return pytest.mark.skipif(MISSING_MARKY_MARKDOWN, reason="missing marky-markdown")(func) diff --git a/gratipay/utils/markdown.py b/gratipay/utils/markdown.py index 8120824e93..d7098d3a27 100644 --- a/gratipay/utils/markdown.py +++ b/gratipay/utils/markdown.py @@ -1,8 +1,6 @@ from __future__ import absolute_import, division, print_function, unicode_literals -from subprocess import Popen, PIPE -import json import misaka as m # http://misaka.61924.nl/ from markupsafe import Markup @@ -19,28 +17,3 @@ def render(markdown): extensions=m.EXT_AUTOLINK | m.EXT_STRIKETHROUGH | m.EXT_NO_INTRA_EMPHASIS, render_flags=m.HTML_SKIP_HTML | m.HTML_TOC | m.HTML_SMARTYPANTS | m.HTML_SAFELINK )) - - -def render_like_npm(markdown, package=None): - """Process markdown the same way npm does. - - Package should be a dict representing the package. If it includes `name` - and `description` then the first h1 and paragraph will have a - 'package-{name,description}-redundant' class added to them if they're - similar enough. If it includes `repository.url` then links will be changed - somehow. For details consult the docs and code: - - https://github.com/npm/marky-markdown - - """ - if type(markdown) is unicode: - markdown = markdown.encode('utf8') - cmd = ("bin/our-marky-markdown.js", "/dev/stdin") - if package: - cmd += (json.dumps(package),) - marky = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE) - out, err = marky.communicate(markdown) - if marky.wait() > 0: - raise OSError(err) - return out - diff --git a/tests/py/test_markdown.py b/tests/py/test_markdown.py index 1c87fd933c..499d196795 100644 --- a/tests/py/test_markdown.py +++ b/tests/py/test_markdown.py @@ -1,10 +1,8 @@ from __future__ import absolute_import, division, print_function, unicode_literals -from gratipay.testing import Harness, skipif_missing_marky_markdown +from gratipay.testing import Harness from gratipay.utils import markdown -from HTMLParser import HTMLParser - class TestMarkdown(Harness): @@ -55,23 +53,3 @@ def test_render_no_intra_emphasis(self): expected = '

Examples like this_one and this other_one.

\n' actual = markdown.render('Examples like this_one and this other_one.') assert expected == actual - - - # rln - render_like_npm - - @skipif_missing_marky_markdown - def test_rln_works(self): - md = "**Hello World!**" - actual = HTMLParser().unescape(markdown.render_like_npm(md)).strip() - expected = '

Hello World!

' - assert actual == expected - - @skipif_missing_marky_markdown - def test_rln_handles_npm_package(self): - md = "# Greetings, program!\nGreetings. Program." - pkg = {'name': 'greetings-program', 'description': 'Greetings, program.'} - actual = HTMLParser().unescape(markdown.render_like_npm(md, pkg)).strip() - expected = '''\ -

Greetings, program!

-

Greetings. Program.

''' - assert actual == expected diff --git a/tests/py/test_sync_npm.py b/tests/py/test_sync_npm.py index 347ecce27b..ca673e81ea 100644 --- a/tests/py/test_sync_npm.py +++ b/tests/py/test_sync_npm.py @@ -5,8 +5,7 @@ import pytest -from gratipay.testing import Harness, skipif_missing_marky_markdown -from gratipay.sync_npm import fetch_readmes, process_readmes +from gratipay.testing import Harness def load(raw): @@ -109,89 +108,3 @@ def test_sentrifier_starts_at_zero(self): def test_sentrifier_fail_fails(self): pytest.raises(RuntimeError, Sentrifier().fail) - - - # fr - fetch_readmes - - def make_package_without_readme_raw(self): - self.db.run("INSERT INTO packages (package_manager, name, description, emails) " - "VALUES ('npm', 'foo-package', 'A package', ARRAY[]::text[])") - - def test_fr_fetches_a_readme(self): - self.make_package_without_readme_raw() - - def fetch(name): - return 200, {'name': 'foo-package', 'readme': '# Greetings, program!'} - - fetch_readmes.main({}, [], self.db, lambda a: a, fetch) - - package = self.db.one('SELECT * FROM packages') - assert package.name == 'foo-package' - assert package.description == 'A package' - assert package.readme == '' - assert package.readme_needs_to_be_processed - assert package.readme_raw == '# Greetings, program!' - assert package.readme_type == 'x-markdown/marky' - assert package.emails == [] - - def test_fr_adds_empty_readme_as_needed(self): - self.make_package_without_readme_raw() - def fetch(name): - return 200, {'name': 'foo-package', 'redmeat': '# Greetings, program!'} - fetch_readmes.main({}, [], self.db, lambda a: a, fetch) - package = self.db.one('SELECT * FROM packages') - assert package.readme_raw == '' - - def test_fr_replaces_non_unicode_with_empty_readme(self): - self.make_package_without_readme_raw() - def fetch(name): - return 200, {'name': 'foo-package', 'readme': {'private': True}} - fetch_readmes.main({}, [], self.db, lambda a: a, fetch) - package = self.db.one('SELECT * FROM packages') - assert package.readme_raw == '' - - def test_fr_deletes_a_readme(self): - self.make_package_without_readme_raw() - fetch_readmes.main({}, [], self.db, lambda a: a, lambda n: (404, {})) - assert self.db.one('SELECT * FROM packages') is None - - def test_fr_tells_sentry_about_problems(self): - self.make_package_without_readme_raw() - sentrified = Sentrifier() - fetch_readmes.main({}, [], self.db, sentrified, sentrified.fail) - assert sentrified.ncalls == 1 - - - # pr - process_readmes - - def make_package_with_readme_raw(self): - self.db.run(''' - - INSERT - INTO packages (package_manager, name, description, readme_raw, readme_type, emails) - VALUES ('npm', 'foo-package', 'A package', '# Greetings, program!', 'x-markdown/marky', - ARRAY[]::text[]) - - ''') - - @skipif_missing_marky_markdown - def test_pr_processes_a_readme(self): - self.make_package_with_readme_raw() - - process_readmes.main({}, [], self.db, lambda a: a) - - package = self.db.one('SELECT * FROM packages') - assert package.name == 'foo-package' - assert package.description == 'A package' - assert package.readme == '

Greetings, program!

\n' - assert not package.readme_needs_to_be_processed - assert package.readme_raw == '# Greetings, program!' - assert package.readme_type == 'x-markdown/marky' - assert package.emails == [] - - - def test_pr_tells_sentry_about_problems(self): - self.make_package_with_readme_raw() - sentrified = Sentrifier() - process_readmes.main({}, [], self.db, sentrified, sentrified.fail) - assert sentrified.ncalls == 1