Skip to content

Commit

Permalink
Merge pull request #29 from Debakel/dev/migrate-to-python3
Browse files Browse the repository at this point in the history
Update to Python 3 and diaspy 0.0.5
  • Loading branch information
Debakel authored Sep 25, 2019
2 parents f4d44f2 + 89eef57 commit 627b8ba
Show file tree
Hide file tree
Showing 10 changed files with 128 additions and 56 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
tests.py
*.pyc
*.db
log/
.idea/
env/
24 changes: 22 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
# feedDiasp*
Feed Diaspora with RSS-Feeds or Facebook.
## Installation

## Requirements

`feedDiasp` requires Python 3 and the `pandoc` library (optional) for converting HTML into Markdown:
* On Debian based distributions : `apt-get install pandoc`
* On ArchLinux : `pacman -S pandoc`
* On MacOS: `brew install pandoc`

## Installation
`$ pip install feeddiasp`

## Usage
## Usage example

from feeddiasp import FeedDiasp, FBParser, RSSParser

Expand All @@ -20,9 +27,22 @@ Feed Diaspora with RSS-Feeds or Facebook.

To avoid duplicates, submitted posts will be stored in `posts.txt` (defined in `db`).

## Running the tests

```
$ python -m unittest tests
```

To run the tests, login credentials for a diaspora account must be stored in the following environment variables:
* `FEEDDIASP_TEST_POD`
* `FEEDDIASP_TEST_USERNAME`
* `FEEDDIASP_TEST_PASSWORD`


## Contributors
* ![Moritz Duchêne](https://github.com/Debakel)
* ![Alexey Veretennikov](https://github.com/fourier)
* ![Céline Libéral](https://github.com/celisoft)

## License

Expand Down
32 changes: 26 additions & 6 deletions feeddiasp/Diasp.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,49 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#!/usr/bin/env python3
import diaspy


class Diasp:
def __init__(self, pod=None, username=None, password=None, provider_name=''):
def __init__(self, pod=None, username=None, password=None, provider_name='feedDiasp*'):
"""
:param pod: Diaspora* pod
:param username: login / username
:param password: uhm ... the password
:param provider_name: usually feedDiasp*
"""
self.connection = None
self.stream = None

self.pod = pod
self.username = username
self.password = password
self.logged_in = False
self.provider_name = provider_name

def login(self):
print 'Login as ' + self.username + ' to ' + self.pod
"""Initialize the connection to the Diaspora* pod """
print('Login as ' + self.username + ' to ' + self.pod)
try:
self.connection = diaspy.connection.Connection(pod=self.pod, username=self.username, password=self.password)
if self.connection is None:
print('Cannot connect to ' + self.pod)
return False
self.connection.login()
self.stream = diaspy.streams.Stream(self.connection, fetch=False)
self.stream = diaspy.streams.Stream(self.connection)
self.logged_in = True
return True
except Exception as e:
print 'Failed to login: ' + str(e)
print('Failed to login: ' + str(e))
raise LoginException(str(e))

def post(self, text, title=None, hashtags=None, source=None, append=None):
"""
Post given message to Diaspora*
:param title: the post title (default is None)
:param text: the message to post
:param hashtags: hashtags list (default is None)
:param source: the source of information at the origin of the post (default is None)
:param append: text to be added at the end (default is None)
"""
if not self.logged_in:
self.login()
if title is not None and len(title) > 0:
Expand Down
13 changes: 7 additions & 6 deletions feeddiasp/FBParser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#!/usr/bin/env python3

import html.parser

import facepy
import HTMLParser


class FBParser:
Expand All @@ -17,7 +18,7 @@ def update(self):
def get_entries(self):
statuses = self.graph.get(self.user + '/posts')['data']
entries = []
htmlparser = HTMLParser.HTMLParser()

for status in statuses:
# skip, if post on wall
if status['from']['id'] != self.user_id:
Expand All @@ -34,7 +35,7 @@ def get_entries(self):
# format Photo
content = self.format_photo(self.graph.get(status['object_id']))
if message:
content += htmlparser.unescape(message)
content += html.parser.feed(message)
elif status['type'] == 'event':
# format Event
content = self.format_event(self.graph.get(status['object_id']))
Expand All @@ -47,7 +48,7 @@ def get_entries(self):
content += description
else:
# format Post
content = htmlparser.unescape(status['message'])
content = html.parser.feed(status['message'])

post['id'] = post_id
post['title'] = ''
Expand Down
21 changes: 6 additions & 15 deletions feeddiasp/FeedDiasp.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,18 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#!/usr/bin/env python3

import time
import sys
from Diasp import Diasp
from PostDBCSV import PostDBCSV
from RSSParser import RSSParser
from FBParser import FBParser
from .Diasp import Diasp
from .PostDBCSV import PostDBCSV


def isstring(s):
try:
return isinstance(s, basestring)
return isinstance(s, (str, bytes))
except NameError:
return isinstance(s, str)


class FeedDiasp:
def __init__(self, pod, username, password, db, parser, keywords=None, hashtags=None, append=None):
# UnicodeEncodeError Workaround
reload(sys);
sys.setdefaultencoding("utf8")

# Feed
self.feed = parser

Expand Down Expand Up @@ -58,7 +49,7 @@ def publish(self):
self.diasp.login()
for post in posts:
if not self.db.is_published(post['id']):
print 'Published: ' + post['title'].encode('utf8')
print('Published: ' + post['title'])
hashtags = self.find_hashtags(post['content'], self.keywords)
if self.hashtags is not None:
hashtags.extend(self.hashtags)
Expand All @@ -71,7 +62,7 @@ def publish(self):
append=self.append)
self.db.mark_as_posted(post['id'])
except Exception as e:
print 'Failed to publish: ' + str(e)
print('Failed to publish: ' + str(e))
return True

def format_tag(self, tag):
Expand Down
5 changes: 2 additions & 3 deletions feeddiasp/PostDBCSV.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#!/usr/bin/env python3

from datetime import datetime
import os
from datetime import datetime


class PostDBCSV:
Expand Down
32 changes: 19 additions & 13 deletions feeddiasp/RSSParser.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#!/usr/bin/env python3

import feedparser
from html2text import html2text
import os
import pypandoc
from html2text import html2text


class RSSParser:
def __init__(self, url):
self.url = url
self.feed = None

def update(self):
self.feed = feedparser.parse(self.url)
Expand All @@ -25,11 +27,11 @@ def get_entries(self):
new_post['title'] = entry.title if 'title' in entry else ''
new_post['link'] = entry.link if 'link' in entry else ''
if 'content' in entry:
new_post['content'] = html2markup(entry.content[0].value) # html2markup() converts HTML to Markup
new_post['content'] = html2markdown(entry.content[0].value)
elif 'summary' in entry:
new_post['content'] = html2markup(entry.summary)
new_post['content'] = html2markdown(entry.summary)
elif 'description' in entry:
new_post['content'] = html2markup(entry.description)
new_post['content'] = html2markdown(entry.description)
else:
new_post['content'] = ''
# tags
Expand All @@ -45,11 +47,15 @@ def get_entries(self):
return entries


def html2markup(text):
def html2markdown(html: str):
"""
Returns the given HTML as equivalent Markdown-structured text.
"""
try:
output = pypandoc.convert(text, 'md', format='html')
return pypandoc.convert_text(html, 'md', format='html')
except OSError:
# Pandoc not installed. Switching to html2text instead
print "Warning: Pandoc not installed. Pandoc is needed to convert HTML-Posts into Markdown. Try sudo apt-get install pandoc."
output = html2text(text)
return output
msg = "It's recommended to install the `pandoc` library for converting " \
"HTML into Markdown-structured text. It tends to have better results" \
"than `html2text`, which is now used as a fallback."
print(msg)
return html2text(html)
9 changes: 5 additions & 4 deletions feeddiasp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from FBParser import FBParser
from RSSParser import RSSParser
from FeedDiasp import FeedDiasp
from Diasp import Diasp
from .Diasp import Diasp
from .FBParser import FBParser
from .FeedDiasp import FeedDiasp
from .PostDBCSV import PostDBCSV
from .RSSParser import RSSParser
13 changes: 7 additions & 6 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
argparse==1.4.0
#argparse==1.2.1
#diaspy==0.3.0
diaspy-api==0.5.1
facepy==1.0.9
facepy==1.0.8
feedparser==5.2.1
html2text==2016.9.19
pypandoc==1.4
requests==2.18.3
six==1.10.0
wsgiref==0.1.2
pypandoc==1.3.3
#requests==2.9.1
#six==1.10.0
#wsgiref==0.1.2
33 changes: 33 additions & 0 deletions tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import os
from unittest import TestCase

from feeddiasp import Diasp, RSSParser

RSS_FEED_URL = "http://www.spiegel.de/schlagzeilen/tops/index.rss"


class DiaspClientTestCase(TestCase):
def setUp(self):
self.pod = os.environ['FEEDDIASP_TEST_POD']
self.username = os.environ['FEEDDIASP_TEST_USERNAME']
self.password = os.environ['FEEDDIASP_TEST_PASSWORD']

def test_login(self):
client = Diasp(
pod=self.pod,
username=self.username,
password=self.password)

try:
client.login()
except Exception as e:
self.fail(e)


class RSSParserTestCase(TestCase):
def test_get_posts(self):
rss = RSSParser(url=RSS_FEED_URL)
rss.update()
posts = rss.get_entries()

self.assertTrue(len(posts) > 0)

0 comments on commit 627b8ba

Please sign in to comment.