Skip to content
This repository has been archived by the owner on Feb 8, 2018. It is now read-only.

Commit

Permalink
Merge pull request #4164 from gratipay/marky-markdown-package
Browse files Browse the repository at this point in the history
Modify first h1/p just like npm does
  • Loading branch information
chadwhitacre authored Nov 9, 2016
2 parents bee3823 + d0b5b4f commit b7b85fa
Show file tree
Hide file tree
Showing 15 changed files with 264 additions and 110 deletions.
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ before_install:
# Sometimes ya just halfta ...
- git clone https://github.com/lloyd/yajl.git && cd yajl && git checkout 2.1.0 && ./configure && sudo make install && cd ..

- npm install -g marky-markdown
- npm install marky-markdown
cache:
directories:
- env/bin
Expand All @@ -23,6 +23,7 @@ install:
- env/bin/pip install --upgrade ijson==2.3.0
before_script:
- echo "DATABASE_URL=dbname=gratipay" | tee -a tests/local.env local.env
- echo "REQUIRE_YAJL=true" | tee -a tests/local.env local.env
- psql -U postgres -c 'CREATE DATABASE "gratipay";'
- if [ "${TRAVIS_BRANCH}" = "master" -a "${TRAVIS_PULL_REQUEST}" = "false" ]; then rm -rfv tests/py/fixtures; fi
script: LD_LIBRARY_PATH=/usr/local/lib make bgrun test doc
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ pyflakes: env
$(env_bin)/pyflakes *.py bin gratipay tests

test: test-schema
$(py_test) --cov gratipay ./tests/
$(py_test) -vv --cov gratipay ./tests/
@$(MAKE) --no-print-directory pyflakes

pytest: env
Expand Down
21 changes: 21 additions & 0 deletions bin/our-marky-markdown.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env node
// Extend marky-markdown.js to support the package argument:
// https://www.npmjs.com/package/marky-markdown#npm-packages

var fs = require('fs')
var path = require('path')
var marky = require('marky-markdown')

if (process.argv.length < 3) {
console.log('Usage:\n\nour-marky-markdown some.md pkg > some.html')
process.exit()
}

var filePath = path.resolve(process.cwd(), process.argv[2])

fs.readFile(filePath, function (err, data) {
if (err) throw err
var package = process.argv[3] ? JSON.parse(process.argv[3]) : null;
var html = marky(data.toString(), {package: package})
process.stdout.write(html)
})
7 changes: 7 additions & 0 deletions defaults.env
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ ASPEN_PROJECT_ROOT=.
ASPEN_SHOW_TRACEBACKS=yes
ASPEN_WWW_ROOT=www/

# This is used in our Procfile. (PORT is also used but is provided by
# Heroku; we don't set it ourselves in our app config.)
# https://github.com/benoitc/gunicorn/issues/186
GUNICORN_OPTS="--workers=1 --timeout=99999999"

Expand All @@ -96,5 +98,10 @@ TEAM_REVIEW_REPO=gratipay/test-gremlin
TEAM_REVIEW_USERNAME=
TEAM_REVIEW_TOKEN=

# anything Postgres can interpret as an interval
RESEND_VERIFICATION_THRESHOLD="3 minutes"

RAISE_SIGNIN_NOTIFICATIONS=no

# speeds up npm syncing; should be true on production and Travis
REQUIRE_YAJL=false
55 changes: 45 additions & 10 deletions gratipay/package_managers/readmes.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def http_fetch(package_name):
return r.json()


def Syncer(db):
def sync(dirty, fetch=http_fetch):
def Fetcher(db):
def fetch(dirty, fetch=http_fetch):
"""Update all info for one package.
"""
log(dirty.name)
Expand All @@ -43,23 +43,58 @@ def sync(dirty, fetch=http_fetch):
db.run('''
UPDATE packages
SET readme=%s
SET readme_needs_to_be_processed=true
, readme_raw=%s
, readme_type=%s
WHERE package_manager=%s
AND name=%s
''', ( markdown.marky(full['readme'])
, full['readme']
, 'x-markdown/npm'
''', ( full['readme']
, 'x-markdown/marky'
, dirty.package_manager
, dirty.name
))

return fetch


def Processor(db):
def process(dirty):
"""Processes the readme for a single page.
"""
log(dirty.name)
raw = db.one( 'SELECT readme_raw FROM packages '
'WHERE package_manager=%s and name=%s and readme_needs_to_be_processed'
, (dirty.package_manager, dirty.name)
)
if raw is None:
return
processed = markdown.render_like_npm(raw)
db.run('''
UPDATE packages
SET readme=%s
, readme_needs_to_be_processed=false
WHERE package_manager=%s
AND name=%s
''', ( processed
, dirty.package_manager
, dirty.name
))

return sync
return process


def fetch(db):
dirty = db.all('SELECT package_manager, name '
'FROM packages WHERE readme_raw IS NULL '
'ORDER BY package_manager DESC, name DESC')
threaded_map(Fetcher(db), dirty, 4)


def sync_all(db):
dirty = db.all('SELECT package_manager, name FROM packages WHERE readme_raw IS NULL '
def process(db):
dirty = db.all('SELECT id, package_manager, name, description, readme_raw '
'FROM packages WHERE readme_needs_to_be_processed'
'ORDER BY package_manager DESC, name DESC')
threaded_map(Syncer(db), dirty, 4)
threaded_map(Processor(db), dirty, 4)
40 changes: 27 additions & 13 deletions gratipay/package_managers/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,21 @@
import time
import uuid

from gratipay import wireup
from gratipay.package_managers import readmes as _readmes


log = lambda *a: print(*a, file=sys.stderr)
NULL = uuid.uuid4().hex


def import_ijson():
import ijson.backends.yajl2_cffi as ijson
# helpers

def import_ijson(env):
if env.require_yajl:
import ijson.backends.yajl2_cffi as ijson
else:
import ijson
return ijson


Expand Down Expand Up @@ -50,10 +56,12 @@ def serialize_one(out, package):
return 1


def serialize(args):
# cli subcommands

def serialize(env, args, _):
"""Consume raw JSON from the npm registry and spit out CSV for Postgres.
"""
ijson = import_ijson()
ijson = import_ijson(env)

path = args.path
parser = ijson.parse(open(path))
Expand Down Expand Up @@ -97,9 +105,9 @@ def log_stats():
log_stats()


def upsert(args):
from gratipay import wireup
db = wireup.db(wireup.env())
def upsert(env, args, db):
"""Take a CSV file from stdin and load it into Postgres.
"""
fp = open(args.path)
with db.get_cursor() as cursor:
assert cursor.connection.encoding == 'UTF8'
Expand Down Expand Up @@ -128,19 +136,25 @@ def upsert(args):
""")


def readmes(args):
from gratipay import wireup
db = wireup.db(wireup.env())
_readmes.sync_all(db)
def fetch_readmes(env, args, db):
_readmes.fetch(db)


def process_readmes(env, args, db):
_readmes.process(db)


# cli plumbing

def parse_args(argv):
p = argparse.ArgumentParser()
p.add_argument('command', choices=['serialize', 'upsert', 'readmes'])
p.add_argument('command', choices=['serialize', 'upsert', 'fetch-readmes', 'process-readmes'])
p.add_argument('path', help='the path to the input file', nargs='?', default='/dev/stdin')
return p.parse_args(argv)


def main(argv=sys.argv):
env = wireup.env()
args = parse_args(argv[1:])
globals()[args.command](args)
db = wireup.db(env)
globals()[args.command.replace('-', '_')](env, args, db)
23 changes: 21 additions & 2 deletions gratipay/testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,13 @@
from __future__ import absolute_import, division, print_function, unicode_literals

from decimal import Decimal
from gratipay.models.participant import Participant
from gratipay.models.team import Team

import pytest
from aspen import log_dammit

from ..models.participant import Participant
from ..models.team import Team
from ..utils import markdown

D = Decimal #:
P = Participant.from_username #:
Expand Down Expand Up @@ -45,3 +50,17 @@ def debug_http():
requests_log = logging.getLogger("requests.packages.urllib3")
requests_log.setLevel(logging.DEBUG)
requests_log.propagate = True


# Provide a decorator to skip tests when marky-markdown is missing.

try:
markdown.render_like_npm('test')
except OSError as exc:
MISSING_MARKY_MARKDOWN = True
log_dammit('Will skip marky-markdown-related tests because:', exc.args[0])
else:
MISSING_MARKY_MARKDOWN = False

def skipif_missing_marky_markdown(func):
return pytest.mark.skipif(MISSING_MARKY_MARKDOWN, reason="missing marky-markdown")(func)
33 changes: 29 additions & 4 deletions gratipay/utils/markdown.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,46 @@
from __future__ import absolute_import, division, print_function, unicode_literals

from subprocess import Popen, PIPE

from markupsafe import Markup
import json
import misaka as m # http://misaka.61924.nl/
from markupsafe import Markup


def render(markdown):
"""Process markdown approximately the same way that GitHub used to.
(Note that as of November, 2016 they are migrating to CommonMark, so we are
starting to drift.)
"""
return Markup(m.html(
markdown,
extensions=m.EXT_AUTOLINK | m.EXT_STRIKETHROUGH | m.EXT_NO_INTRA_EMPHASIS,
render_flags=m.HTML_SKIP_HTML | m.HTML_TOC | m.HTML_SMARTYPANTS | m.HTML_SAFELINK
))


def marky(markdown):
def render_like_npm(markdown, package=None):
"""Process markdown the same way npm does.
Package should be a dict representing the package. If it includes `name`
and `description` then the first h1 and paragraph will have a
'package-{name,description}-redundant' class added to them if they're
similar enough. If it includes `repository.url` then links will be changed
somehow. For details consult the docs and code:
https://github.com/npm/marky-markdown
"""
if type(markdown) is unicode:
markdown = markdown.encode('utf8')
marky = Popen(("marky-markdown", "/dev/stdin"), stdin=PIPE, stdout=PIPE)
return Markup(marky.communicate(markdown)[0])
cmd = ("bin/our-marky-markdown.js", "/dev/stdin")
if package:
cmd += (json.dumps(package),)
marky = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
out, err = marky.communicate(markdown)
if marky.wait() > 0:
raise OSError(err)
return out

6 changes: 2 additions & 4 deletions gratipay/wireup.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,10 +432,8 @@ def env():
TEAM_REVIEW_USERNAME = unicode,
TEAM_REVIEW_TOKEN = unicode,
RAISE_SIGNIN_NOTIFICATIONS = is_yesish,
RESEND_VERIFICATION_THRESHOLD = unicode, # anything Postgres can interpret as an interval

# This is used in our Procfile. (PORT is also used but is provided by
# Heroku; we don't set it ourselves in our app config.)
RESEND_VERIFICATION_THRESHOLD = unicode,
REQUIRE_YAJL = is_yesish,
GUNICORN_OPTS = unicode,
)

Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,6 @@
./vendor/ipaddress-1.0.16.tar.gz
./vendor/cryptography-1.3.2.tar.gz

./vendor/ijson-2.3.tar.gz

-e .
1 change: 1 addition & 0 deletions sql/branch.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ALTER TABLE packages ADD COLUMN readme_needs_to_be_processed bool NOT NULL DEFAULT true;
Loading

0 comments on commit b7b85fa

Please sign in to comment.