diff --git a/.gitignore b/.gitignore index ea4f3f30..73247c9e 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ /.eggs/ /*.egg-info /l +/l3 /local/ /local3/ circleci_token diff --git a/README.md b/README.md index 598335e2..fe709ce5 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,7 @@ Granary is a library and REST API that fetches and converts between a wide varie Here's how to get started: * Granary is [available on PyPi.](https://pypi.python.org/pypi/granary/) Install with `pip install granary`. +* Supports Python 2.7+ and 3.3+. * [Click here for getting started docs.](#using) * [Click here for reference docs.](https://granary.readthedocs.io/en/latest/source/granary.html) * The REST API and demo app are deployed at [granary.io](https://granary.io/). @@ -223,8 +224,12 @@ On the open source side, there are many related projects. [php-mf2-shim](https:/ Changelog --- ### 1.12 - unreleased +* Add Python 3 support! Granary now supports Python 2.7+ and 3.3+. * Instagram: * Fix scraping profile pages. + +This release is intentionally small and limited in scope to contain any impact of the Ptython 3 migration. It *should* be a noop for existing users, and we've tested thoroughly, but I'm sure there are still bugs. Please file issues if you notice anything broken! + ### 1.11 - 2018-03-09 * Add GitHub! * `get_activities()` supports issues and pull requests, including comments and reactions. It's currently based on notifications, so it's best effort, not comprehensive, and only includes recently active issues/PRs. diff --git a/granary/as2.py b/granary/as2.py index 78655b3b..70e5d5a8 100644 --- a/granary/as2.py +++ b/granary/as2.py @@ -5,6 +5,9 @@ AS1: http://activitystrea.ms/specs/json/1.0/ http://activitystrea.ms/specs/json/schema/activity-schema.html """ +from __future__ import unicode_literals +from past.builtins import basestring + import copy import logging @@ -105,7 +108,7 @@ def all_from_as1(field, type=None): obj['location'] = from_as1(loc, type='Place', context=None) obj = util.trim_nulls(obj) - if obj.keys() == ['url']: + if list(obj.keys()) == ['url']: return obj['url'] return obj diff --git a/granary/atom.py b/granary/atom.py index 96fdb92c..6e8e9341 100644 --- a/granary/atom.py +++ b/granary/atom.py @@ -2,11 +2,16 @@ Atom spec: https://tools.ietf.org/html/rfc4287 (RIP atomenabled.org) """ +from __future__ import absolute_import, unicode_literals +from future import standard_library +standard_library.install_aliases() +from builtins import str + import collections import mimetypes import os import re -import urlparse +import urllib.parse from xml.etree import ElementTree import xml.sax.saxutils @@ -16,8 +21,8 @@ import mf2util from oauth_dropins.webutil import util -import microformats2 -import source +from . import microformats2 +from . import source FEED_TEMPLATE = 'user_feed.atom' ENTRY_TEMPLATE = 'entry.atom' @@ -60,9 +65,10 @@ def _text(elem, field=None): if ':' not in field: field = 'atom:' + field elem = elem.find(field, NAMESPACES) + if elem is not None and elem.text: text = elem.text - if not isinstance(elem.text, unicode): + if not isinstance(elem.text, str): text = text.decode('utf-8') return text.strip() @@ -96,7 +102,9 @@ def __init__(self, **kwargs): for k, v in kwargs.items()}) def __unicode__(self): - return super(Defaulter, self).__unicode__() if self else u'' + return super(Defaulter, self).__unicode__() if self else '' + + __str__ = __unicode__ def __hash__(self): return super(Defaulter, self).__hash__() if self else None.__hash__() @@ -177,7 +185,7 @@ def atom_to_activities(atom): Returns: list of ActivityStreams activity dicts """ - assert isinstance(atom, unicode) + assert isinstance(atom, str) parser = ElementTree.XMLParser(encoding='UTF-8') feed = ElementTree.XML(atom.encode('utf-8'), parser=parser) if _tag(feed) != 'feed': @@ -194,7 +202,7 @@ def atom_to_activity(atom): Returns: dict, ActivityStreams activity """ - assert isinstance(atom, unicode) + assert isinstance(atom, str) parser = ElementTree.XMLParser(encoding='UTF-8') entry = ElementTree.XML(atom.encode('utf-8'), parser=parser) if _tag(entry) != 'entry': @@ -385,10 +393,10 @@ def _prepare_activity(a, reader=True): if not image: continue url = image.get('url') - parsed = urlparse.urlparse(url) + parsed = urllib.parse.urlparse(url) scheme = parsed.scheme netloc = parsed.netloc - rest = urlparse.urlunparse(('', '') + parsed[2:]) + rest = urllib.parse.urlunparse(('', '') + parsed[2:]) img_src_re = re.compile(r"""src *= *['"] *((https?:)?//%s)?%s *['"]""" % (re.escape(netloc), re.escape(rest))) if (url and url not in image_urls_seen and @@ -410,6 +418,6 @@ def _prepare_activity(a, reader=True): obj[prop] += 'Z' def _remove_query_params(url): - parsed = list(urlparse.urlparse(url)) + parsed = list(urllib.parse.urlparse(url)) parsed[4] = '' - return urlparse.urlunparse(parsed) + return urllib.parse.urlunparse(parsed) diff --git a/granary/facebook.py b/granary/facebook.py index ed47914d..e73449fe 100644 --- a/granary/facebook.py +++ b/granary/facebook.py @@ -10,8 +10,7 @@ Retrieving @all activities from :meth:`get_activities()` (the default) currently returns an incomplete set of activities, ie *NOT* exactly the same set as your Facebook News Feed: https://www.facebook.com/help/327131014036297/ -""" -""" + This is complicated, and I still don't fully understand how or why they differ, but based on lots of experimenting and searching, it sounds like the current state is that you just can't reproduce the News Feed via Graph API's /me/home, @@ -42,20 +41,23 @@ See the fql_stream_to_post() method below for code I used to experiment with the FQL stream table. """ +from __future__ import absolute_import, division, unicode_literals +from future import standard_library +standard_library.install_aliases() +from builtins import range, str, zip + import collections import copy import itertools import json import logging import re -import urllib -import urllib2 -import urlparse +import urllib.error, urllib.parse, urllib.request import mf2util -import appengine_config +from . import appengine_config from oauth_dropins.webutil import util -import source +from . import source # Since API v2.4, we need to explicitly ask for the fields we want from most API # endpoints with ?fields=... @@ -176,13 +178,13 @@ } # https://developers.facebook.com/docs/graph-api/reference/post/reactions REACTION_CONTENT = { - 'LOVE': u'❤️', - 'WOW': u'😮', - 'HAHA': u'😆', - 'SAD': u'😢', - 'ANGRY': u'😡', - 'THANKFUL': u'🌼', # https://github.com/snarfed/bridgy/issues/748 - 'PRIDE': u'🏳️‍🌈', + 'LOVE': '❤️', + 'WOW': '😮', + 'HAHA': '😆', + 'SAD': '😢', + 'ANGRY': '😡', + 'THANKFUL': '🌼', # https://github.com/snarfed/bridgy/issues/748 + 'PRIDE': '🏳️‍🌈', # nothing for LIKE (it's a like :P) or for NONE } @@ -306,7 +308,7 @@ def get_activities_response(self, user_id=None, group_id=None, app_id=None, resp = self.urlopen(url, headers=headers, _as=None) etag = resp.info().get('ETag') posts = self._as(list, source.load_json(resp.read(), url)) - except urllib2.HTTPError, e: + except urllib.error.HTTPError as e: if e.code == 304: # Not Modified, from a matching ETag posts = [] else: @@ -432,7 +434,7 @@ def _merge_photos(self, posts, user_id): photo['privacy'] = 'custom' # ie unknown return ([p for p in posts if not p.get('object_id')] + - posts_by_obj_id.values() + photos) + list(posts_by_obj_id.values()) + photos) def _split_id_requests(self, api_call, ids): """Splits an API call into multiple to stay under the MAX_IDS limit per call. @@ -512,7 +514,7 @@ def get_comment(self, comment_id, activity_id=None, activity_author_id=None, """ try: resp = self.urlopen(API_COMMENT % comment_id) - except urllib2.HTTPError, e: + except urllib.error.HTTPError as e: if e.code == 400 and '_' in comment_id: # Facebook may want us to ask for this without the other prefixed id(s) resp = self.urlopen(API_COMMENT % comment_id.split('_')[-1]) @@ -680,7 +682,7 @@ def _create(self, obj, preview=None, include_link=source.OMIT_LINK, '%s' % ( tag.get('url'), tag.get('displayName') or 'User %s' % tag['id']) for tag in people) - msg_data = {'message': content.encode('utf-8')} + msg_data = collections.OrderedDict({'message': content.encode('utf-8')}) if appengine_config.DEBUG: msg_data['privacy'] = json.dumps({'value': 'SELF'}) @@ -703,7 +705,7 @@ def _create(self, obj, preview=None, include_link=source.OMIT_LINK, if image_url: msg_data['attachment_url'] = image_url resp = self.urlopen(API_PUBLISH_COMMENT % base_id, - data=urllib.urlencode(util.encode(msg_data))) + data=urllib.parse.urlencode(util.msg_data)) url = self.comment_url(base_id, resp['id'], post_author_id=base_obj.get('author', {}).get('id')) resp.update({'url': url, 'type': 'comment'}) @@ -812,7 +814,7 @@ def _create(self, obj, preview=None, include_link=source.OMIT_LINK, # https://developers.facebook.com/docs/graph-api/reference/user/feed#pubfields msg_data['tags'] = ','.join(tag['id'] for tag in people) - resp = self.urlopen(api_call, data=urllib.urlencode(util.encode(msg_data))) + resp = self.urlopen(api_call, data=urllib.parse.urlencode(msg_data)) resp.update({'url': self.post_url(resp), 'type': 'post'}) if video_url and not resp.get('success', True): msg = 'Video upload failed.' @@ -858,7 +860,7 @@ def _get_person_tags(self, obj): tag['id'] = id people[id] = tag - return people.values() + return list(people.values()) def create_notification(self, user_id, text, link): """Sends the authenticated user a notification. @@ -884,7 +886,7 @@ def create_notification(self, user_id, text, link): appengine_config.FACEBOOK_APP_SECRET), } url = API_BASE + API_NOTIFICATION % user_id - resp = util.urlopen(urllib2.Request(url, data=urllib.urlencode(params))) + resp = util.urlopen(urllib.request.Request(url, data=urllib.parse.urlencode(params))) logging.debug('Response: %s %s', resp.getcode(), resp.read()) def post_url(self, post): @@ -960,8 +962,8 @@ def base_object(self, obj, verb=None, resolve_numeric_id=False): base_obj = self.user_to_actor(self.urlopen(base_id)) try: - parsed = urlparse.urlparse(url) - params = urlparse.parse_qs(parsed.query) + parsed = urllib.parse.urlparse(url) + params = urllib.parse.parse_qs(parsed.query) assert parsed.path.startswith('/') path = parsed.path.strip('/') path_parts = path.split('/') @@ -1022,7 +1024,7 @@ def base_object(self, obj, verb=None, resolve_numeric_id=False): # add author user id prefix. https://github.com/snarfed/bridgy/issues/229 base_obj['id'] = '%s_%s' % (author['numeric_id'], base_id) - except BaseException, e: + except BaseException as e: logging.exception( "Couldn't parse object URL %s : %s. Falling back to default logic.", url, e) @@ -1759,7 +1761,7 @@ def urlopen(self, url, _as=dict, **kwargs): log_url = url if self.access_token: url = util.add_query_params(url, [('access_token', self.access_token)]) - resp = util.urlopen(urllib2.Request(url, **kwargs)) + resp = util.urlopen(urllib.request.Request(url, **kwargs)) if _as is None: return resp @@ -1819,8 +1821,8 @@ def urlopen_batch(self, urls): for url, resp in zip(urls, resps): code = int(resp.get('code', 0)) body = resp.get('body') - if code / 100 in (4, 5): - raise urllib2.HTTPError(url, code, body, resp.get('headers'), None) + if code // 100 in (4, 5): + raise urllib.error.HTTPError(url, code, body, resp.get('headers'), None) bodies.append(body) return bodies @@ -1863,7 +1865,7 @@ def urlopen_batch_full(self, requests): req['headers'] = [{'name': n, 'value': v} for n, v in req['headers'].items()] - data = 'batch=' + json.dumps(util.trim_nulls(requests), + data = 'batch=' + json.dumps(util.trim_nulls(requests), sort_keys=True, separators=(',', ':')) # no whitespace resps = self.urlopen('', data=data, _as=list) diff --git a/granary/flickr.py b/granary/flickr.py index c1b7aa7e..4da5cc96 100644 --- a/granary/flickr.py +++ b/granary/flickr.py @@ -10,6 +10,12 @@ (https://www.flickr.com/services/api/flickr.activity.userPhotos.html) when group_id=SELF. """ +from __future__ import absolute_import, unicode_literals +from future import standard_library +standard_library.install_aliases() +from builtins import next, str +from past.builtins import basestring + import copy import datetime import functools @@ -17,20 +23,19 @@ import json import logging import requests -import source import sys import mf2py import mf2util -import urllib2 -import urlparse +import urllib.error, urllib.parse, urllib.request -import appengine_config -from oauth_dropins.webutil import util -from oauth_dropins import flickr_auth +from . import appengine_config from apiclient.errors import HttpError from apiclient.http import BatchHttpRequest +from oauth_dropins.webutil import util +from oauth_dropins import flickr_auth +from . import source class Flickr(source.Source): """Flickr source class. See file docstring and Source class for details.""" @@ -185,15 +190,14 @@ def _create(self, obj, preview, include_link=source.OMIT_LINK, if content: params.append(('description', content.encode('utf-8'))) if hashtags: - params.append( - ('tags', ','.join(('"%s"' % t if ' ' in t else t).encode('utf-8') - for t in hashtags))) + params.append(('tags', ','.join(('"%s"' % t if ' ' in t else t) + for t in hashtags))) file = util.urlopen(video_url or image_url) try: resp = self.upload(params, file) except requests.exceptions.ConnectionError as e: - if unicode(e.args[0].message).startswith('Request exceeds 10 MiB limit'): + if str(e.args[0]).startswith('Request exceeds 10 MiB limit'): msg = 'Sorry, photos and videos must be under 10MB.' return source.creation_result(error_plain=msg, error_html=msg) else: @@ -311,7 +315,7 @@ def _get_person_tags(self, obj): tag = copy.copy(tag) tag['id'] = id people[id] = tag - return people.values() + return list(people.values()) def get_activities_response(self, user_id=None, group_id=None, app_id=None, activity_id=None, start_index=0, count=0, @@ -441,7 +445,7 @@ def user_to_actor(self, resp): obj['url'] = next( (u for u in urls if not u.startswith('https://www.flickr.com/')), None) - except urllib2.URLError, e: + except urllib.error.URLError as e: logging.warning('could not fetch user homepage %s', profile_url) return self.postprocess_object(obj) @@ -511,7 +515,7 @@ def photo_to_activity(self, photo): 'url': photo_permalink, 'id': self.tag_uri(photo.get('id')), 'image': { - 'url': u'https://farm{}.staticflickr.com/{}/{}_{}_{}.jpg'.format( + 'url': 'https://farm{}.staticflickr.com/{}/{}_{}_{}.jpg'.format( photo.get('farm'), photo.get('server'), photo.get('id'), photo.get('secret'), 'b'), }, @@ -554,14 +558,14 @@ def photo_to_activity(self, photo): activity['object']['tags'] = [{ 'objectType': 'hashtag', 'id': self.tag_uri(tag.get('id')), - 'url': u'https://www.flickr.com/search?tags={}'.format( + 'url': 'https://www.flickr.com/search?tags={}'.format( tag.get('_content')), 'displayName': tag.get('raw'), } for tag in photo.get('tags', {}).get('tag', [])] elif isinstance(photo.get('tags'), basestring): activity['object']['tags'] = [{ 'objectType': 'hashtag', - 'url': u'https://www.flickr.com/search?tags={}'.format( + 'url': 'https://www.flickr.com/search?tags={}'.format( tag.strip()), 'displayName': tag.strip(), } for tag in photo.get('tags').split(' ') if tag.strip()] @@ -605,10 +609,10 @@ def like_to_object(self, person, photo_activity): }, }, 'created': util.maybe_timestamp_to_rfc3339(photo_activity.get('favedate')), - 'url': u'{}#liked-by-{}'.format( + 'url': '{}#liked-by-{}'.format( photo_activity.get('url'), person.get('nsid')), 'object': {'url': photo_activity.get('url')}, - 'id': self.tag_uri(u'{}_liked_by_{}'.format( + 'id': self.tag_uri('{}_liked_by_{}'.format( photo_activity.get('flickr_id'), person.get('nsid'))), 'objectType': 'activity', 'verb': 'like', @@ -655,8 +659,8 @@ def get_user_image(self, farm, server, author): ref: https://www.flickr.com/services/api/misc.buddyicons.html """ if server == 0: - return u'https://www.flickr.com/images/buddyicon.gif' - return u'https://farm{}.staticflickr.com/{}/buddyicons/{}.jpg'.format( + return 'https://www.flickr.com/images/buddyicon.gif' + return 'https://farm{}.staticflickr.com/{}/buddyicons/{}.jpg'.format( farm, server, author) def user_id(self): @@ -709,13 +713,13 @@ def photo_url(self, user_id, photo_id): Returns: string, the photo URL """ - return u'https://www.flickr.com/photos/%s/%s/' % (user_id, photo_id) + return 'https://www.flickr.com/photos/%s/%s/' % (user_id, photo_id) @classmethod def base_id(cls, url): """Used when publishing comments or favorites. Flickr photo ID is the 3rd path component rather than the first. """ - parts = urlparse.urlparse(url).path.split('/') + parts = urllib.parse.urlparse(url).path.split('/') if len(parts) >= 4 and parts[1] == 'photos': return parts[3] diff --git a/granary/googleplus.py b/granary/googleplus.py index be328f36..870d7ebb 100644 --- a/granary/googleplus.py +++ b/granary/googleplus.py @@ -4,14 +4,17 @@ Audience Targeting 'to' field is always set to @public. https://developers.google.com/+/api/latest/activities/list#collection """ +from __future__ import absolute_import, division, unicode_literals +from builtins import range + import datetime import functools import itertools import json import re -import appengine_config -import source +from . import appengine_config +from . import source from apiclient.errors import HttpError from apiclient.http import BatchHttpRequest @@ -128,7 +131,7 @@ def request_with_etag(*args, **kwargs): resp = call.execute(http=http) activities = resp.get('items', []) etag = resp.get('etag') - except HttpError, e: + except HttpError as e: if e.resp.status == 304: # Not Modified, from a matching ETag activities = [] else: @@ -328,14 +331,14 @@ def html_to_activities(self, html): html = re.sub(r'([,[])\s*([],])', r'\1null\2', html) data = json.loads(html)[0][7] - data = [d[6].values()[0] for d in data if len(d) >= 7 and d[6]] + data = [list(d[6].values())[0] for d in data if len(d) >= 7 and d[6]] activities = [] for d in data: id = self.tag_uri(d[8]) url = 'https://%s/%s' % (self.DOMAIN, d[21]) # d[132] is full url # posix timestamp in ms - published = datetime.datetime.utcfromtimestamp(d[5] / 1000).isoformat('T') + 'Z' + published = datetime.datetime.utcfromtimestamp(d[5] / 1000).isoformat(util.T) + 'Z' if d[69] and len(d[69]) >= 2 and d[69][1] and d[69][1][0]: # this is a like, reshare, etc diff --git a/granary/instagram.py b/granary/instagram.py index 3bb26f18..3fb4f530 100644 --- a/granary/instagram.py +++ b/granary/instagram.py @@ -6,6 +6,11 @@ https://groups.google.com/forum/m/#!topic/instagram-api-developers/DAO7OriVFsw https://groups.google.com/forum/#!searchin/instagram-api-developers/private """ +from __future__ import absolute_import, unicode_literals +from future import standard_library +standard_library.install_aliases() +from past.builtins import basestring + import datetime import itertools import json @@ -13,16 +18,14 @@ import operator import re import string -import urllib -import urllib2 -import urlparse +import urllib.error, urllib.parse, urllib.request import xml.sax.saxutils -import appengine_config +from . import appengine_config from bs4 import BeautifulSoup from oauth_dropins.webutil import util import requests -import source +from . import source # Maps Instagram media type to ActivityStreams objectType. OBJECT_TYPES = {'image': 'photo', 'video': 'video'} @@ -87,11 +90,10 @@ def __init__(self, access_token=None, allow_comment_creation=False, scrape=False def urlopen(self, url, **kwargs): """Wraps :func:`urllib2.urlopen()` and passes through the access token.""" - log_url = url if self.access_token: # TODO add access_token to the data parameter for POST requests url = util.add_query_params(url, [('access_token', self.access_token)]) - resp = util.urlopen(urllib2.Request(url, **kwargs)) + resp = util.urlopen(urllib.request.Request(url, **kwargs)) return (resp if kwargs.get('data') else source.load_json(resp.read(), url).get('data')) @@ -219,7 +221,7 @@ def get_activities_response(self, user_id=None, group_id=None, app_id=None, activities += [self.like_to_object(user, l['id'], l['link']) for l in liked] - except urllib2.HTTPError, e: + except urllib.error.HTTPError as e: code, body = util.interpret_http_exception(e) # instagram api should give us back a json block describing the # error. but if it's an error for some other reason, it probably won't @@ -412,14 +414,14 @@ def _create(self, obj, include_link=source.OMIT_LINK, preview=None, abort=True, error_plain='Cannot publish comments on Instagram', error_html='Cannot publish comments on Instagram. The Instagram API technically supports creating comments, but anecdotal evidence suggests they are very selective about which applications they approve to do so.') - content = self._content_for_create(obj).encode('utf-8') + content = self._content_for_create(obj) if preview: return source.creation_result( content=content, description='comment on ' 'this post:\n%s' % (base_url, self.embed_post(base_obj))) - self.urlopen(API_COMMENT_URL % base_id, data=urllib.urlencode({ + self.urlopen(API_COMMENT_URL % base_id, data=urllib.parse.urlencode({ 'access_token': self.access_token, 'text': content, })) @@ -453,7 +455,7 @@ def _create(self, obj, include_link=source.OMIT_LINK, preview=None, logging.info('posting like for media id id=%s, url=%s', base_id, base_url) # no response other than success/failure - self.urlopen(API_MEDIA_LIKES_URL % base_id, data=urllib.urlencode({ + self.urlopen(API_MEDIA_LIKES_URL % base_id, data=urllib.parse.urlencode({ 'access_token': self.access_token })) # TODO use the stored user_json rather than looking it up each time. diff --git a/granary/jsonfeed.py b/granary/jsonfeed.py index 3f4b9bf5..86a74930 100644 --- a/granary/jsonfeed.py +++ b/granary/jsonfeed.py @@ -2,6 +2,10 @@ JSON Feed spec: https://jsonfeed.org/version/1 """ +from __future__ import unicode_literals +from builtins import str +from past.builtins import basestring + import mimetypes import mf2util @@ -139,7 +143,7 @@ def attachment(jf): 'title': item.get('title'), 'summary': item.get('summary'), 'content': item.get('content_html') or item.get('content_text'), - 'id': unicode(item.get('id') or ''), + 'id': str(item.get('id') or ''), 'published': item.get('date_published'), 'updated': item.get('date_modified'), 'url': item.get('url'), diff --git a/granary/microformats2.py b/granary/microformats2.py index aba03dc7..5a19485d 100644 --- a/granary/microformats2.py +++ b/granary/microformats2.py @@ -2,11 +2,17 @@ Microformats2 specs: http://microformats.org/wiki/microformats2 """ -from collections import defaultdict +from __future__ import absolute_import, unicode_literals +from future import standard_library +standard_library.install_aliases() +from builtins import str +from past.builtins import basestring + +from collections import defaultdict, OrderedDict import copy import itertools import logging -import urlparse +import urllib.parse import string import re import xml.sax.saxutils @@ -21,7 +27,8 @@ get_urls, uniquify, ) -import source + +from . import source HENTRY = string.Template("""\
@@ -366,7 +373,7 @@ def fetch(url): def absolute_urls(prop): return [url for url in get_string_urls(props.get(prop, [])) # filter out relative and invalid URLs (mf2py gives absolute urls) - if urlparse.urlparse(url).netloc] + if urllib.parse.urlparse(url).netloc] urls = props.get('url') and get_string_urls(props.get('url')) @@ -738,11 +745,11 @@ def render_content(obj, include_location=True, synthesize_content=True, mentions.sort(key=lambda t: t['startIndex']) last_end = 0 orig = util.WideUnicode(content) - content = util.WideUnicode(u'') + content = util.WideUnicode('') for tag in mentions: start = tag['startIndex'] end = start + tag['length'] - content = util.WideUnicode(u'%s%s%s' % ( + content = util.WideUnicode('%s%s%s' % ( content, orig[last_end:start], tag['url'], orig[start:end])) last_end = end @@ -795,7 +802,7 @@ def render_content(obj, include_location=True, synthesize_content=True, target.get('url', '#'), author.get('username')) else: # image looks bad in the simplified rendering - author = {k: v for k, v in author.iteritems() if k != 'image'} + author = {k: v for k, v in author.items() if k != 'image'} content += '%s %s by %s' % ( verb, target.get('url', '#'), target.get('displayName', target.get('title', 'a post')), @@ -918,13 +925,13 @@ def tags_to_html(tags, classname): tags: decoded JSON ActivityStreams objects. classname: class for span to enclose tags in """ - urls = set() # stores (url, displayName) tuples + urls = OrderedDict() # stores (url, displayName) tuples for tag in tags: name = tag.get('displayName') or '' - urls.update((url, name) for url in object_urls(tag)) + urls.update({(url, name): None for url in object_urls(tag)}) return ''.join('\n%s' % (classname, url, name) - for url, name in urls) + for url, name in urls.keys()) def object_urls(obj): diff --git a/granary/source.py b/granary/source.py index e1eceeb5..e2556e8b 100644 --- a/granary/source.py +++ b/granary/source.py @@ -9,21 +9,26 @@ or unset if unknown. http://activitystrea.ms/specs/json/targeting/1.0/#anchor3 """ +from __future__ import absolute_import, unicode_literals +from future import standard_library +from future.utils import with_metaclass +standard_library.install_aliases() +from builtins import object, str + import collections import copy import json import logging import mimetypes import re -import urllib2 -import urlparse +import urllib.error, urllib.parse, urllib.request import html2text from bs4 import BeautifulSoup +from oauth_dropins.webutil import util import requests -import appengine_config -from oauth_dropins.webutil import util +from . import appengine_config ME = '@me' SELF = '@self' @@ -101,7 +106,7 @@ def load_json(body, url): except (ValueError, TypeError): msg = 'Non-JSON response! Returning synthetic HTTP 502.\n%s' % body logging.error(msg) - raise urllib2.HTTPError(url, 502, msg, {}, None) + raise urllib.error.HTTPError(url, 502, msg, {}, None) def creation_result(content=None, description=None, abort=False, @@ -157,7 +162,7 @@ def __new__(meta, name, bases, class_dict): return cls -class Source(object): +class Source(with_metaclass(SourceMeta, object)): """Abstract base class for a source (e.g. Facebook, Twitter). Concrete subclasses must override the class constants below and implement @@ -177,7 +182,6 @@ class Source(object): * HTML2TEXT_OPTIONS: dict mapping string html2text option names to values https://github.com/Alir3z4/html2text/blob/master/docs/usage.md#available-options """ - __metaclass__ = SourceMeta POST_ID_RE = None HTML2TEXT_OPTIONS = {} @@ -632,19 +636,20 @@ def original_post_discovery(activity, domains=None, cache=None, candidates += [match.expand(r'http://\1/\2') for match in Source._PERMASHORTCITATION_RE.finditer(content)] - candidates = set(filter(None, - (util.clean_url(url) for url in candidates - # heuristic: ellipsized URLs are probably incomplete, so omit them. - if url and not url.endswith('...') and not url.endswith(u'…')))) + candidates = set( + util.clean_url(url) for url in candidates + # heuristic: ellipsized URLs are probably incomplete, so omit them. + if url and not url.endswith('...') and not url.endswith('…')) # check for redirect and add their final urls redirects = {} # maps final URL to original URL for redirects - for url in list(candidates): + for url in candidates: resolved = util.follow_redirects(url, cache=cache, **kwargs) if (resolved.url != url and resolved.headers.get('content-type', '').startswith('text/html')): redirects[resolved.url] = url - candidates.add(resolved.url) + + candidates.update(redirects.keys()) # use domains to determine which URLs are original post links vs mentions originals = set() @@ -868,7 +873,7 @@ def base_id(cls, url): Returns: string, or None """ - return urlparse.urlparse(url).path.rstrip('/').rsplit('/', 1)[-1] or None + return urllib.parse.urlparse(url).path.rstrip('/').rsplit('/', 1)[-1] or None @classmethod def post_id(cls, url): @@ -918,14 +923,14 @@ def _content_for_create(self, obj, ignore_formatting=False, prefer_name=False, video = soup.video or soup.find(class_='u-video') if video: video.extract() - content = unicode(soup) + content = str(soup) if strip_quotations: quotations = soup.find_all(class_='u-quotation-of') if quotations: for q in quotations: q.extract() - content = unicode(soup) + content = str(soup) # compare to content with HTML tags stripped if summary == soup.get_text('').strip(): @@ -947,4 +952,4 @@ def _content_for_create(self, obj, ignore_formatting=False, prefer_name=False, return summary or ( (name or content) if prefer_name else (content or name) - ) or u'' + ) or '' diff --git a/granary/templates/_entry.atom b/granary/templates/_entry.atom index 1e9793c5..326715f2 100644 --- a/granary/templates/_entry.atom +++ b/granary/templates/_entry.atom @@ -66,7 +66,7 @@ {% endfor %} {% set location = obj.location %} {% if location.latitude and location.longitude %} - {{ location.latitude }} {{location.longitude }} + {{ location.latitude|round(9) }} {{ location.longitude|round(9) }} {% endif %} {% if location.displayName %} {{ location.displayName }} diff --git a/granary/test/test_as2.py b/granary/test/test_as2.py index d0590237..1ef3b417 100644 --- a/granary/test/test_as2.py +++ b/granary/test/test_as2.py @@ -4,12 +4,14 @@ Most of the tests are in testdata/. This is just a few things that are too small for full testdata tests. """ +from __future__ import unicode_literals + from oauth_dropins.webutil import testutil from granary import as2 -class ActivityStreams2Test(testutil.HandlerTest): +class ActivityStreams2Test(testutil.TestCase): def test_from_as1_blank(self): self.assertEqual({}, as2.from_as1(None)) diff --git a/granary/test/test_atom.py b/granary/test/test_atom.py index 4851a64e..5b8b0532 100644 --- a/granary/test/test_atom.py +++ b/granary/test/test_atom.py @@ -1,9 +1,10 @@ # coding=utf-8 """Unit tests for atom.py.""" +from __future__ import unicode_literals import copy -import mox +from mox3 import mox from oauth_dropins.webutil import testutil import requests @@ -104,7 +105,7 @@ } -class AtomTest(testutil.HandlerTest): +class AtomTest(testutil.TestCase): def test_activities_to_atom(self): for test_module in test_facebook, test_instagram, test_twitter: @@ -182,7 +183,7 @@ def test_atom_to_activity_reply(self): 'object': { 'id': 'reply-url', 'url': 'reply-url', - 'content': u'I hereby ☕ reply.', + 'content': 'I hereby ☕ reply.', 'inReplyTo': [{'id': 'foo-id', 'url': 'foo-url'}], }, } @@ -216,7 +217,7 @@ def test_atom_to_activity_unicode_title(self): self.assert_equals({ 'objectType': 'activity', 'object': { - 'title': u'How quill’s editor looks', + 'title': 'How quill’s editor looks', }, }, atom.atom_to_activity(u"""\ @@ -628,7 +629,7 @@ def test_html_to_atom_fetch_author(self): https://my.site/author Tantek Çelik -""".encode('utf-8'), got.encode('utf-8'), ignore_blanks=True) +""", got, ignore_blanks=True) def test_media_tags_and_enclosures(self): got = atom.activities_to_atom([{ @@ -752,7 +753,7 @@ def test_image_duplicated_in_attachment(self): } got = atom.activities_to_atom([activity], {}) - self.assertEquals(1, got.count(''), got) + self.assertEqual(1, got.count(''), got) self.assert_multiline_in(""" diff --git a/granary/test/test_facebook.py b/granary/test/test_facebook.py index 474a72a6..56a60c44 100644 --- a/granary/test/test_facebook.py +++ b/granary/test/test_facebook.py @@ -1,12 +1,16 @@ # coding=utf-8 """Unit tests for facebook.py. """ +from __future__ import unicode_literals +from future import standard_library +standard_library.install_aliases() +from builtins import range, zip + import copy import json -import urllib -import urllib2 +import urllib.error, urllib.parse, urllib.request -import mox +from mox3 import mox from oauth_dropins.webutil import testutil from oauth_dropins.webutil import util @@ -215,7 +219,7 @@ def tag_uri(name): 'state': 'CA', 'country': 'United States', 'latitude': 37.728193717481, - 'longitude': -122.49336423595 + 'longitude': -122.493364235852, } }, 'type': 'photo', @@ -448,7 +452,7 @@ def tag_uri(name): 'url': 'https://www.facebook.com/212038/posts/10100176064482163#haha-by-100005', 'objectType': 'activity', 'verb': 'react', - 'content': u'😆', + 'content': '😆', 'object': {'url': 'https://www.facebook.com/212038/posts/10100176064482163'}, 'author': { 'objectType': 'person', @@ -463,7 +467,7 @@ def tag_uri(name): 'url': 'https://www.facebook.com/212038/posts/10100176064482163#sad-by-100006', 'objectType': 'activity', 'verb': 'react', - 'content': u'😢', + 'content': '😢', 'object': {'url': 'https://www.facebook.com/212038/posts/10100176064482163'}, 'author': { 'objectType': 'person', @@ -532,7 +536,7 @@ def tag_uri(name): 'id': tag_uri('113785468632283'), 'url': 'https://www.facebook.com/113785468632283', 'latitude': 37.728193717481, - 'longitude': -122.49336423595, + 'longitude': -122.493364235852, 'position': '+37.728194-122.493364/', }, 'tags': [{ @@ -584,7 +588,7 @@ def tag_uri(name): 'objectType': 'note', 'url': 'https://www.facebook.com/212038/posts/222', 'content': 'Stopped in to grab coffee and saw this table topper. Wow. Just...wow.', - 'image': {'url': u'https://fbcdn-photos-b-a.akamaihd.net/pic_o.jpg'}, + 'image': {'url': 'https://fbcdn-photos-b-a.akamaihd.net/pic_o.jpg'}, 'published': '2014-04-09T20:44:26+00:00', 'author': POST_OBJ['author'], 'to': [{'alias': '@public', 'objectType': 'group'}], @@ -636,7 +640,7 @@ def tag_uri(name): 'url': 'https://www.facebook.com/212038/posts/222#wow-by-777', 'objectType': 'activity', 'verb': 'react', - 'content': u'😮', + 'content': '😮', 'object': {'url': 'https://www.facebook.com/212038/posts/222'}, 'author': { 'objectType': 'person', @@ -906,7 +910,7 @@ def tag_uri(name): 'count': 2, 'cover_photo': '1520050698319836', 'from': { - 'name': u'Snoøpy Barrett', + 'name': 'Snoøpy Barrett', 'id': '1407574399567467' }, 'link': 'https://www.facebook.com/album.php?fbid=1520022318322674&id=1407574399567467&aid=1073741827', @@ -925,7 +929,7 @@ def tag_uri(name): 'objectType': 'person', 'id': tag_uri('1407574399567467'), 'numeric_id': '1407574399567467', - 'displayName': u'Snoøpy Barrett', + 'displayName': 'Snoøpy Barrett', 'image': {'url': 'https://graph.facebook.com/v2.10/1407574399567467/picture?type=large'}, 'url': 'https://www.facebook.com/1407574399567467', }, @@ -1029,7 +1033,7 @@ def tag_uri(name): 2012-03-04T18:20:37+00:00 2012-03-04T19:08:16+00:00 - 37.7281937175 -122.493364236 + 37.728193717 -122.493364236 Lake Merced @@ -1043,7 +1047,7 @@ def tag_uri(name): """ -class FacebookTest(testutil.HandlerTest): +class FacebookTest(testutil.TestCase): def setUp(self): super(FacebookTest, self).setUp() @@ -1303,13 +1307,13 @@ def test_get_activities_activity_id_with_underscore(self): self.expect_urlopen(API_OBJECT % ('12', '34'), {'id': '123'}) self.mox.ReplayAll() obj = self.fb.get_activities(activity_id='12_34')[0]['object'] - self.assertEquals('123', obj['fb_id']) + self.assertEqual('123', obj['fb_id']) def test_get_activities_activity_id_with_user_id(self): self.expect_urlopen(API_OBJECT % ('12', '34'), {'id': '123'}) self.mox.ReplayAll() obj = self.fb.get_activities(activity_id='34', user_id='12')[0]['object'] - self.assertEquals('123', obj['fb_id']) + self.assertEqual('123', obj['fb_id']) def test_get_activities_activity_id_no_underscore_or_user_id(self): with self.assertRaises(ValueError): @@ -1323,7 +1327,7 @@ def test_get_activities_response_not_json(self): try: self.fb.get_activities() assert False, 'expected HTTPError' - except urllib2.HTTPError, e: + except urllib.error.HTTPError as e: self.assertEqual(502, e.code) self.assertEqual('Non-JSON response! Returning synthetic HTTP 502.\nnot json', e.reason) @@ -1589,7 +1593,7 @@ def test_get_comment_400s_id_without_underscore(self): '123?fields=id,message,from,created_time,message_tags,parent,attachment', {}, status=400) self.mox.ReplayAll() - self.assertRaises(urllib2.HTTPError, self.fb.get_comment, '123') + self.assertRaises(urllib.error.HTTPError, self.fb.get_comment, '123') def test_get_comment_with_activity(self): # still makes the API call, since the comment might be paged out or nested @@ -1630,7 +1634,7 @@ def test_get_share_obj_400s(self): def test_get_share_500s(self): self.expect_urlopen(API_SHARES % '1_2', {}, status=500) self.mox.ReplayAll() - self.assertRaises(urllib2.HTTPError, self.fb.get_share, '1', '2', '_') + self.assertRaises(urllib.error.HTTPError, self.fb.get_share, '1', '2', '_') def test_get_share_with_activity(self): self.expect_urlopen(API_SHARES % '1_2', {'1_2': {'data': [{'id': SHARE['id']}]}}) @@ -1792,7 +1796,7 @@ def test_post_to_object_with_comment_unknown_id_format(self): def test_post_to_object_message_tags_list(self): post = copy.copy(POST) - tags = post['message_tags'].values() + tags = list(post['message_tags'].values()) post['message_tags'] = tags[0] + tags[1] # both lists self.assert_equals(POST_OBJ, self.fb.post_to_object(post)) @@ -1941,8 +1945,8 @@ def test_user_to_actor_multiple_urls(self): http://b http://c""", 'link': 'http://x', # website overrides link }) - self.assertEquals('http://a', actor['url']) - self.assertEquals( + self.assertEqual('http://a', actor['url']) + self.assertEqual( [{'value': 'http://a'}, {'value': 'http://b'}, {'value': 'http://c'}], actor['urls']) @@ -1950,8 +1954,8 @@ def test_user_to_actor_multiple_urls(self): 'id': '123', 'link': 'http://b http://c http://a', }) - self.assertEquals('http://b', actor['url']) - self.assertEquals( + self.assertEqual('http://b', actor['url']) + self.assertEqual( [{'value': 'http://b'}, {'value': 'http://c'}, {'value': 'http://a'}], actor['urls']) @@ -2234,7 +2238,7 @@ def test_album_to_object_full(self): self.assert_equals(ALBUM_OBJ, self.fb.album_to_object(ALBUM)) def test_create_post(self): - self.expect_urlopen(API_PUBLISH_POST, {'id': '123_456'}, data=urllib.urlencode({ + self.expect_urlopen(API_PUBLISH_POST, {'id': '123_456'}, data=urllib.parse.urlencode({ 'message': 'my msg', 'tags': '234,345,456', })) @@ -2253,11 +2257,11 @@ def test_create_post(self): }, self.fb.create(obj).content) preview = self.fb.preview_create(obj) - self.assertEquals('post:', preview.description) - self.assertEquals('my msg

with Friend 1, Friend 2, Friend 3', preview.content) + self.assertEqual('post:', preview.description) + self.assertEqual('my msg

with Friend 1, Friend 2, Friend 3', preview.content) def test_create_post_include_link(self): - self.expect_urlopen(API_PUBLISH_POST, {}, data=urllib.urlencode({ + self.expect_urlopen(API_PUBLISH_POST, {}, data=urllib.parse.urlencode({ 'message': 'my content\n\n(Originally published at: http://obj.co)', })) self.mox.ReplayAll() @@ -2274,12 +2278,12 @@ def test_create_post_include_link(self): }) self.fb.create(obj, include_link=source.INCLUDE_LINK) preview = self.fb.preview_create(obj, include_link=source.INCLUDE_LINK) - self.assertEquals( + self.assertEqual( 'my content\n\n(Originally published at: http://obj.co)', preview.content) def test_create_post_with_title(self): - self.expect_urlopen(API_PUBLISH_POST, {}, data=urllib.urlencode({ + self.expect_urlopen(API_PUBLISH_POST, {}, data=urllib.parse.urlencode({ 'message': 'my title\n\nmy content\n\n(Originally published at: http://obj.co)', })) self.mox.ReplayAll() @@ -2296,12 +2300,12 @@ def test_create_post_with_title(self): }) self.fb.create(obj, include_link=source.INCLUDE_LINK) preview = self.fb.preview_create(obj, include_link=source.INCLUDE_LINK) - self.assertEquals( + self.assertEqual( 'my title\n\nmy content\n\n(Originally published at: http://obj.co)', preview.content) def test_create_post_with_no_title(self): - self.expect_urlopen(API_PUBLISH_POST, {}, data=urllib.urlencode({ + self.expect_urlopen(API_PUBLISH_POST, {}, data=urllib.parse.urlencode({ 'message': 'my\ncontent\n\n(Originally published at: http://obj.co)', })) self.mox.ReplayAll() @@ -2318,7 +2322,7 @@ def test_create_post_with_no_title(self): }) self.fb.create(obj, include_link=source.INCLUDE_LINK) preview = self.fb.preview_create(obj, include_link=source.INCLUDE_LINK) - self.assertEquals( + self.assertEqual( 'my\ncontent\n\n(Originally published at: http://obj.co)', preview.content) @@ -2337,7 +2341,7 @@ def test_create_comment(self): }, self.fb.create(obj).content) preview = self.fb.preview_create(obj) - self.assertEquals('my cmt', preview.content) + self.assertEqual('my cmt', preview.content) self.assertIn('comment on this post:', preview.description) self.assertIn('
', preview.description) @@ -2381,7 +2385,7 @@ def test_create_comment_on_post_urls(self): def test_create_comment_with_photo(self): self.expect_urlopen( '547822715231468/comments', {'id': '456_789'}, - data=urllib.urlencode({'message': 'cc Sam G, Michael M', + data=urllib.parse.urlencode({'message': 'cc Sam G, Michael M', 'attachment_url': 'http://pict/ure'})) self.mox.ReplayAll() @@ -2397,7 +2401,7 @@ def test_create_comment_with_photo(self): }, self.fb.create(obj).content) preview = self.fb.preview_create(obj) - self.assertEquals('cc Sam G, Michael M

', + self.assertEqual('cc Sam G, Michael M

', preview.content) self.assertIn('comment on this post:', preview.description) self.assertIn('
', preview.description) @@ -2496,7 +2500,7 @@ def test_create_rsvp(self): '%s\n%s' % (created.content, rsvp)) preview = self.fb.preview_create(rsvp) - self.assertEquals('RSVP maybe to ' + self.assertEqual('RSVP maybe to ' 'this event.', preview.description) @@ -2604,19 +2608,20 @@ def test_create_with_photo(self): # test preview preview = self.fb.preview_create(obj) - self.assertEquals('post:', preview.description) - self.assertEquals(u'my caption

', + self.assertEqual('post:', preview.description) + self.assertEqual('my caption

', preview.content) # test create self.expect_urlopen(API_ALBUMS % 'me', {'data': []}) self.expect_urlopen(API_PUBLISH_PHOTO, {'id': '123_456'}, - data=u'url=http%3A%2F%2Fmy%2Fpictur%C3%A9&message=my+caption') + data='message=my+caption&url=http%3A%2F%2Fmy%2Fpictur%C3%A9') self.mox.ReplayAll() self.assert_equals({ 'id': '123_456', 'url': 'https://www.facebook.com/123/posts/456', - 'type': 'post'}, self.fb.create(obj).content) + 'type': 'post', + }, self.fb.create(obj).content) def test_create_with_photo_uses_timeline_photos_album(self): """https://github.com/snarfed/bridgy/issues/571""" @@ -2629,8 +2634,10 @@ def test_create_with_photo_uses_timeline_photos_album(self): {'id': '1', 'name': 'foo bar'}, {'id': '2', 'type': 'wall'}, ]}) - self.expect_urlopen('2/photos', {}, data=urllib.urlencode({ - 'url': 'http://my/picture', 'message': ''})) + self.expect_urlopen('2/photos', {}, data=urllib.parse.urlencode(( + ('message', ''), + ('url', 'http://my/picture'), + ))) self.mox.ReplayAll() self.assert_equals({'type': 'post', 'url': None}, self.fb.create(obj).content) @@ -2650,7 +2657,7 @@ def test_create_with_photo_and_person_tags(self): # test preview preview = self.fb.preview_create(obj) - self.assertEquals( + self.assertEqual( '



with ' 'Foo, ' 'User 345', @@ -2659,11 +2666,11 @@ def test_create_with_photo_and_person_tags(self): # test create self.expect_urlopen(API_ALBUMS % 'me', {'data': []}) self.expect_urlopen( - API_PUBLISH_PHOTO, {'id': '123_456'}, data=urllib.urlencode({ - 'url': 'http://my/picture', - 'message': '', - 'tags': json.dumps([{'tag_uid': '234'}, {'tag_uid': '345'}]), - })) + API_PUBLISH_PHOTO, {'id': '123_456'}, data=urllib.parse.urlencode(( + ('message', ''), + ('url', 'http://my/picture'), + ('tags', json.dumps([{'tag_uid': '234'}, {'tag_uid': '345'}])), + ))) self.mox.ReplayAll() self.assert_equals({ 'id': '123_456', @@ -2681,13 +2688,13 @@ def test_create_with_video(self): # test preview preview = self.fb.preview_create(obj) - self.assertEquals('post:', preview.description) - self.assertEquals('my\ncaption