Merge pull request #29 from Debakel/dev/migrate-to-python3

Update to Python 3 and diaspy 0.0.5
Debakel · Sep 25, 2019 · 627b8ba · 627b8ba
2 parents f4d44f2 + 89eef57
commit 627b8ba
Show file tree

Hide file tree

Showing 10 changed files with 128 additions and 56 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,5 @@
-tests.py
 *.pyc
 *.db
 log/
 .idea/
+env/
diff --git a/README.md b/README.md
@@ -1,10 +1,17 @@
 # feedDiasp*
 Feed Diaspora with RSS-Feeds or Facebook.
-## Installation
 
+## Requirements
+
+`feedDiasp` requires Python 3 and the `pandoc` library (optional) for converting HTML into Markdown:
+ * On Debian based distributions : `apt-get install pandoc`
+ * On ArchLinux : `pacman -S pandoc`
+ * On MacOS: `brew install pandoc`
+
+## Installation
 `$ pip install feeddiasp`  
 
-## Usage
+## Usage example
 
     from feeddiasp import FeedDiasp, FBParser, RSSParser
 
@@ -20,9 +27,22 @@ Feed Diaspora with RSS-Feeds or Facebook.
 
 To avoid duplicates, submitted posts will be stored in `posts.txt` (defined in `db`).
 
+## Running the tests
+
+```
+$ python -m unittest tests
+```
+
+To run the tests, login credentials for a diaspora account must be stored in the following environment variables:
+* `FEEDDIASP_TEST_POD`
+* `FEEDDIASP_TEST_USERNAME`
+* `FEEDDIASP_TEST_PASSWORD`
+
+
 ## Contributors
 * ![Moritz Duchêne](https://github.com/Debakel)
 * ![Alexey Veretennikov](https://github.com/fourier)
+* ![Céline Libéral](https://github.com/celisoft)
 
 ## License
 

diff --git a/feeddiasp/Diasp.py b/feeddiasp/Diasp.py
@@ -1,29 +1,49 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+#!/usr/bin/env python3
 import diaspy
 
 
 class Diasp:
-    def __init__(self, pod=None, username=None, password=None, provider_name=''):
+    def __init__(self, pod=None, username=None, password=None, provider_name='feedDiasp*'):
+        """
+        :param pod: Diaspora* pod
+        :param username: login / username
+        :param password: uhm ... the password
+        :param provider_name: usually feedDiasp*
+        """
+        self.connection = None
+        self.stream = None
+
         self.pod = pod
         self.username = username
         self.password = password
         self.logged_in = False
         self.provider_name = provider_name
 
     def login(self):
-        print 'Login as ' + self.username + ' to ' + self.pod
+        """Initialize the connection to the Diaspora* pod """
+        print('Login as ' + self.username + ' to ' + self.pod)
         try:
             self.connection = diaspy.connection.Connection(pod=self.pod, username=self.username, password=self.password)
+            if self.connection is None:
+                print('Cannot connect to ' + self.pod)
+                return False
             self.connection.login()
-            self.stream = diaspy.streams.Stream(self.connection, fetch=False)
+            self.stream = diaspy.streams.Stream(self.connection)
             self.logged_in = True
             return True
         except Exception as e:
-            print 'Failed to login: ' + str(e)
+            print('Failed to login: ' + str(e))
             raise LoginException(str(e))
 
     def post(self, text, title=None, hashtags=None, source=None, append=None):
+        """
+        Post given message to Diaspora*
+        :param title: the post title (default is None)
+        :param text: the message to post
+        :param hashtags: hashtags list (default is None)
+        :param source: the source of information at the origin of the post (default is None)
+        :param append: text to be added at the end (default is None)
+        """
         if not self.logged_in:
             self.login()
         if title is not None and len(title) > 0:

diff --git a/feeddiasp/FBParser.py b/feeddiasp/FBParser.py
@@ -1,7 +1,8 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+#!/usr/bin/env python3
+
+import html.parser
+
 import facepy
-import HTMLParser
 
 
 class FBParser:
@@ -17,7 +18,7 @@ def update(self):
     def get_entries(self):
         statuses = self.graph.get(self.user + '/posts')['data']
         entries = []
-        htmlparser = HTMLParser.HTMLParser()
+
         for status in statuses:
             # skip, if post on wall
             if status['from']['id'] != self.user_id:
@@ -34,7 +35,7 @@ def get_entries(self):
                 # format Photo
                 content = self.format_photo(self.graph.get(status['object_id']))
                 if message:
-                    content += htmlparser.unescape(message)
+                    content += html.parser.feed(message)
             elif status['type'] == 'event':
                 # format Event
                 content = self.format_event(self.graph.get(status['object_id']))
@@ -47,7 +48,7 @@ def get_entries(self):
                     content += description
             else:
                 # format Post
-                content = htmlparser.unescape(status['message'])
+                content = html.parser.feed(status['message'])
 
             post['id'] = post_id
             post['title'] = ''

diff --git a/feeddiasp/FeedDiasp.py b/feeddiasp/FeedDiasp.py
@@ -1,27 +1,18 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+#!/usr/bin/env python3
 
-import time
-import sys
-from Diasp import Diasp
-from PostDBCSV import PostDBCSV
-from RSSParser import RSSParser
-from FBParser import FBParser
+from .Diasp import Diasp
+from .PostDBCSV import PostDBCSV
 
 
 def isstring(s):
     try:
-        return isinstance(s, basestring)
+        return isinstance(s, (str, bytes))
     except NameError:
         return isinstance(s, str)
 
 
 class FeedDiasp:
     def __init__(self, pod, username, password, db, parser, keywords=None, hashtags=None, append=None):
-        # UnicodeEncodeError Workaround
-        reload(sys);
-        sys.setdefaultencoding("utf8")
-
         # Feed
         self.feed = parser
 
@@ -58,7 +49,7 @@ def publish(self):
             self.diasp.login()
         for post in posts:
             if not self.db.is_published(post['id']):
-                print 'Published: ' + post['title'].encode('utf8')
+                print('Published: ' + post['title'])
                 hashtags = self.find_hashtags(post['content'], self.keywords)
                 if self.hashtags is not None:
                     hashtags.extend(self.hashtags)
@@ -71,7 +62,7 @@ def publish(self):
                                     append=self.append)
                     self.db.mark_as_posted(post['id'])
                 except Exception as e:
-                    print 'Failed to publish: ' + str(e)
+                    print('Failed to publish: ' + str(e))
         return True
 
     def format_tag(self, tag):

diff --git a/feeddiasp/PostDBCSV.py b/feeddiasp/PostDBCSV.py
@@ -1,8 +1,7 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+#!/usr/bin/env python3
 
-from datetime import datetime
 import os
+from datetime import datetime
 
 
 class PostDBCSV:

diff --git a/feeddiasp/RSSParser.py b/feeddiasp/RSSParser.py
@@ -1,12 +1,14 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+#!/usr/bin/env python3
+
 import feedparser
-from html2text import html2text
-import os
 import pypandoc
+from html2text import html2text
+
+
 class RSSParser:
     def __init__(self, url):
         self.url = url
+        self.feed = None
 
     def update(self):
         self.feed = feedparser.parse(self.url)
@@ -25,11 +27,11 @@ def get_entries(self):
             new_post['title'] = entry.title if 'title' in entry else ''
             new_post['link'] = entry.link if 'link' in entry else ''
             if 'content' in entry:
-                new_post['content'] = html2markup(entry.content[0].value)  # html2markup() converts HTML to Markup
+                new_post['content'] = html2markdown(entry.content[0].value)
             elif 'summary' in entry:
-                new_post['content'] = html2markup(entry.summary)
+                new_post['content'] = html2markdown(entry.summary)
             elif 'description' in entry:
-                new_post['content'] = html2markup(entry.description)
+                new_post['content'] = html2markdown(entry.description)
             else:
                 new_post['content'] = ''
             # tags
@@ -45,11 +47,15 @@ def get_entries(self):
         return entries
 
 
-def html2markup(text):
+def html2markdown(html: str):
+    """
+    Returns the given HTML as equivalent Markdown-structured text.
+    """
     try:
-        output = pypandoc.convert(text, 'md', format='html')
+        return pypandoc.convert_text(html, 'md', format='html')
     except OSError:
-        # Pandoc not installed. Switching to html2text instead
-        print "Warning: Pandoc not installed. Pandoc is needed to convert HTML-Posts into Markdown. Try sudo apt-get install pandoc."
-        output = html2text(text)
-    return output
+        msg = "It's recommended to install the `pandoc` library for converting " \
+              "HTML into Markdown-structured text. It tends to have better results" \
+              "than `html2text`, which is now used as a fallback."
+        print(msg)
+        return html2text(html)
diff --git a/feeddiasp/__init__.py b/feeddiasp/__init__.py
@@ -1,4 +1,5 @@
-from FBParser import FBParser
-from RSSParser import RSSParser
-from FeedDiasp import FeedDiasp
-from Diasp import Diasp
+from .Diasp import Diasp
+from .FBParser import FBParser
+from .FeedDiasp import FeedDiasp
+from .PostDBCSV import PostDBCSV
+from .RSSParser import RSSParser
diff --git a/requirements.txt b/requirements.txt
@@ -1,9 +1,10 @@
-argparse==1.4.0
+#argparse==1.2.1
+#diaspy==0.3.0
 diaspy-api==0.5.1
-facepy==1.0.9
+facepy==1.0.8
 feedparser==5.2.1
 html2text==2016.9.19
-pypandoc==1.4
-requests==2.18.3
-six==1.10.0
-wsgiref==0.1.2
+pypandoc==1.3.3
+#requests==2.9.1
+#six==1.10.0
+#wsgiref==0.1.2
diff --git a/tests.py b/tests.py
@@ -0,0 +1,33 @@
+import os
+from unittest import TestCase
+
+from feeddiasp import Diasp, RSSParser
+
+RSS_FEED_URL = "http://www.spiegel.de/schlagzeilen/tops/index.rss"
+
+
+class DiaspClientTestCase(TestCase):
+    def setUp(self):
+        self.pod = os.environ['FEEDDIASP_TEST_POD']
+        self.username = os.environ['FEEDDIASP_TEST_USERNAME']
+        self.password = os.environ['FEEDDIASP_TEST_PASSWORD']
+
+    def test_login(self):
+        client = Diasp(
+            pod=self.pod,
+            username=self.username,
+            password=self.password)
+
+        try:
+            client.login()
+        except Exception as e:
+            self.fail(e)
+
+
+class RSSParserTestCase(TestCase):
+    def test_get_posts(self):
+        rss = RSSParser(url=RSS_FEED_URL)
+        rss.update()
+        posts = rss.get_entries()
+
+        self.assertTrue(len(posts) > 0)