-
Notifications
You must be signed in to change notification settings - Fork 4
/
goodreads.py
123 lines (95 loc) · 3.76 KB
/
goodreads.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/python
# -*- Mode: Python; coding: utf-8; indent-tabs-mode: nil; tab-width: 4 -*-
"""
Variety quote plugin sourcing quotes from goodreads.com
This script is placed in '~/.config/variety/plugins' and then activated from inside Variety's
Preferences Quotes menu
"""
import logging
import random
from locale import gettext as _
from variety.Util import Util
from variety.plugins.IQuoteSource import IQuoteSource
logger = logging.getLogger("variety")
class GoodreadsSource(IQuoteSource):
"""
Retrives quotes from goodreads.com. Reads the popular quotes.
Attributes:
quotes(list): list containing the quotes
"""
def __init__(self):
super(IQuoteSource, self).__init__()
self.quotes = []
@classmethod
def get_info(cls):
return {
"name": "Goodreads",
"description": _("Popular quotes from goodreads.com"),
"author": "Denis Mach",
"version": "0.1"
}
def supports_search(self):
return False
def activate(self):
if self.active:
return
self.active = True
self.quotes = []
self.fetch_goodreads_quotes()
def deactivate(self):
self.quotes = []
self.active = False
def fetch_goodreads_quotes(self):
BASE_URL = 'https://www.goodreads.com/quotes'
self.quotes = []
# iterate through goodreads pagination
for i in range(1, 20):
query = '?page=' + str(i)
url = BASE_URL + query
bs = Util.html_soup(url)
# this is the element that contains the quote text
quoteElems = bs.find_all("div", {"class": "quoteText"})
# process the selected elems to get the quote text
for tag in quoteElems:
# ignore the tag that contains the author info
quote_stripped_author = tag.contents[:-1]
quoteFragments = []
self._create_quote_fragments(quoteFragments, quote_stripped_author)
# create a new entry
new_quote = self.assemble_quote(tag, quoteFragments, url)
self.quotes.append(new_quote)
if not self.quotes:
logger.warning("Could not find quotes for URL " + BASE_URL)
def _create_quote_fragments(self, quoteFragments, quote_stripped_author):
# the quote might be seperated by several <br>
# have to traverse them to gather up the quote text pieces
for quote_pieces in quote_stripped_author:
fragment = ''
try:
fragment = quote_pieces.text.strip()
except:
fragment = quote_pieces.strip()
quoteFragments.append(fragment)
def assemble_quote(self, tag, quoteFragments, url):
# there is a unicode hyphen that seperates the quote and author
# we want to extract only the quote by splitting
quoteText = " ".join(quoteFragments)
splitQuote = quoteText.split(u"\u2015")
# this gets rid of the unicode quotations and stuff
# TODO need an algo to detect multiple quotes to ignore their removal
quoteText = splitQuote[0].encode("ascii", "ignore")
quoteText = ' '.join(quoteText.split())
author = tag.find("a", {"class": "authorOrTitle"}).contents[
0].strip().encode("ascii", "ignore")
newItem = {"quote": quoteText, "author": author,
"sourceName": "goodreads", "link": url}
return newItem
def get_for_author(self, author):
return []
def get_for_keyword(self, keyword):
return []
def get_random(self):
if self.quotes:
return [random.choice(self.quotes)]
else:
return self.quotes