Skip to content

Commit

Permalink
add wallabag, chatgpt translator
Browse files Browse the repository at this point in the history
  • Loading branch information
cdhigh committed Apr 7, 2024
1 parent a2fc2c5 commit f72a19b
Show file tree
Hide file tree
Showing 33 changed files with 1,068 additions and 722 deletions.
48 changes: 24 additions & 24 deletions application/lib/calibre/web/feeds/news.py
Original file line number Diff line number Diff line change
Expand Up @@ -1076,7 +1076,7 @@ def _postprocess_html(self, soup, first_fetch, job_info):

#If translation need, translator propery is set by WorkerImpl
if (getattr(self, 'translator', None) or {}).get('enable'):
self.translate_html(soup)
self.translate_html(soup, title)

if job_info:
try:
Expand All @@ -1098,7 +1098,7 @@ def append_share_links(self, soup, url):

shareLinks = self.user.share_links
aTags = []
for type_ in ['Evernote', 'Wiz', 'Pocket', 'Instapaper']:
for type_ in ['Evernote', 'Wiz', 'Pocket', 'Instapaper', 'wallabag']:
if shareLinks.get(type_, {}).get('enable'):
ashare = soup.new_tag('a', href=self.make_share_link(type_, self.user, url, soup))
ashare.string = _('Save to {}').format(type_)
Expand All @@ -1125,7 +1125,7 @@ def append_share_links(self, soup, url):

#生成保存内容或分享文章链接的KindleEar调用链接
def make_share_link(self, shareType, user, url, soup):
share_key = user.share_links.get('key', '123')
share_key = user.share_links.get('key', '')
titleTag = soup.find('title')
title = titleTag.string if titleTag else 'Untitled'
appDomain = os.getenv('APP_DOMAIN')
Expand All @@ -1135,6 +1135,8 @@ def make_share_link(self, shareType, user, url, soup):
href = f'{appDomain}/share?act=Pocket&u={user.name}&t={title}&k={share_key}&url={quote(url)}'
elif shareType == 'Instapaper':
href = f'{appDomain}/share?act=Instapaper&u={user.name}&t={title}&k={share_key}&url={quote(url)}'
elif shareType == 'wallabag':
href = f'{appDomain}/share?act=wallabag&u={user.name}&t={title}&k={share_key}&url={quote(url)}'
elif shareType == 'Weibo':
href = f'https://service.weibo.com/share/share.php?url={quote(url)}'
elif shareType == 'Facebook':
Expand Down Expand Up @@ -1959,9 +1961,10 @@ def internal_postprocess_book(self, oeb, opts, log):
seen.add(url)

#调用在线翻译服务平台,翻译html
def translate_html(self, soup):
def translate_html(self, soup, title):
from ebook_translator import HtmlTranslator
translator = HtmlTranslator(self.translator, self.simultaneous_downloads)
self.log.debug(f'Translating [{title}]')
translator.translate_soup(soup)

#翻译Feed的title,toc时用到
Expand Down Expand Up @@ -2074,7 +2077,7 @@ def parse_feeds(self):

feeds = []
id_counter = 0
added = set();
added = set()
for obj in main_urls:
main_title, main_url = (self.title, obj) if isinstance(obj, str) else obj
feed = Feed()
Expand Down Expand Up @@ -2114,10 +2117,6 @@ def parse_feeds(self):

return feeds

#在一个soup对象中查找所有满足条件的tag
def _soup_find_all(self, tag, rule):
return tag.find_all(**rule) if isinstance(rule, dict) else tag.select(rule)

#从一个网页中根据指定的规则,提取文章链接
def extract_urls(self, main_title, main_url):
resp = self.browser.open(main_url, timeout=self.timeout)
Expand All @@ -2128,14 +2127,9 @@ def extract_urls(self, main_title, main_url):
soup = BeautifulSoup(resp.text, 'lxml')

articles = []
for rule in self.url_extract_rules:
resultTags = self._soup_find_all(soup, rule[0])
for flt in rule[1:]:
resultTags = [self._soup_find_all(tag, flt) for tag in resultTags]
resultTags = [tag for sublist in resultTags for tag in sublist] #二级列表展开为一级列表

for item in resultTags:
#如果最终tag不是链接,则在子节点中查找
for rules in self.url_extract_rules:
for item in self.get_tags_from_rules(soup, rules):
#如果最终tag不是链接,则在子节点中查找,并且添加所有找到的链接
item = item.find_all('a') if item.name.lower() != 'a' else [item]
for tag in item:
title = ' '.join(tag.stripped_strings) or main_title
Expand All @@ -2160,17 +2154,23 @@ def preprocess_raw_html(self, raw_html, url):
return raw_html

newBody = soup.new_tag('body')
for rule in self.content_extract_rules:
resultTags = self._soup_find_all(soup, rule[0])
for flt in rule[1:]:
resultTags = [self._soup_find_all(tag, flt) for tag in resultTags]
resultTags = [tag for sublist in resultTags for tag in sublist] #二级列表展开为一级列表

newBody.extend(resultTags)
for rules in self.content_extract_rules:
newBody.extend(self.get_tags_from_rules(soup, rules))

oldBody.replace_with(newBody)
return str(soup)

#根据一个规则列表,从soup中获取符合条件的tag列表
#rules: 字符串列表或字典列表
def get_tags_from_rules(self, soup, rules):
if isinstance(rules[0], dict): #使用Tag字典查找
resultTags = soup.find_all(**rules[0])
for idx, flt in enumerate(rules[1:]):
resultTags = [tag.find_all(**flt) for tag in resultTags]
resultTags = [tag for sublist in resultTags for tag in sublist] #二级列表展开为一级列表
else: #使用CSS选择器,每个选择器的总共最长允许字符长度:1366
resultTags = soup.select(' '.join(rules))
return resultTags

class CalibrePeriodical(BasicNewsRecipe):

Expand Down
11 changes: 6 additions & 5 deletions application/lib/ebook_translator/engines/baidu.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@
import json
import random
import hashlib

from urllib.parse import urljoin
from .base import Base
from .languages import baidu

class BaiduTranslate(Base):
name = 'Baidu'
alias = 'Baidu'
lang_codes = Base.load_lang_codes(baidu)
endpoint = 'https://fanyi-api.baidu.com/api/trans/vip/translate'
default_api_host = 'https://fanyi-api.baidu.com'
endpoint = '/api/trans/vip/translate'
api_key_hint = 'appid|appkey'
api_key_pattern = r'^[^\s:\|]+?[:\|][^\s:\|]+$'
api_key_errors = ['54004']
Expand All @@ -34,7 +35,7 @@ def translate(self, text):
'salt': salt,
'sign': sign
}

return self.get_result(
self.endpoint, data, headers, method='POST',
endpoint = urljoin(self.api_host or self.default_api_host, self.endpoint)
return self.get_result(endpoint, data, headers, method='POST',
callback=lambda r: json.loads(r)['trans_result'][0]['dst'])
12 changes: 7 additions & 5 deletions application/lib/ebook_translator/engines/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import ssl
import os.path
import traceback

from urllib.parse import urljoin
from urlopener import UrlOpener

class Base:
Expand All @@ -12,6 +12,7 @@ class Base:
lang_codes = {}
endpoint = None
need_api_key = True
default_api_host = ''
api_key_hint = _('API Keys')
api_key_pattern = r'^[^\s]+$'
api_key_errors = ['401']
Expand All @@ -25,7 +26,7 @@ class Base:
request_timeout = 10.0
max_error_count = 10

def __init__(self):
def __init__(self, config=None):
self.source_lang = None #语种显示的名字
self.target_lang = None
self.source_code = None #语种代码
Expand All @@ -35,7 +36,7 @@ def __init__(self):

self.merge_enabled = False

self.set_config()
self.set_config(config)

@classmethod
def load_lang_codes(cls, codes):
Expand Down Expand Up @@ -81,6 +82,7 @@ def get_iso639_target_code(cls, lang):
def set_config(self, config=None):
self.config = config or {}
self.api_keys = self.config.get('api_keys', [])[:]
self.api_host = self.config.get('api_host', self.default_api_host)
self.bad_api_keys = []
self.api_key = self._get_api_key()

Expand Down Expand Up @@ -195,8 +197,8 @@ def get_result(self, url, data=None, headers=None, method='GET',
if resp.status_code == 200:
text = []
if stream:
for line in resp.iter_content(chunk_size=None, decode_unicode=True):
text.append(line)
for line in resp.iter_content(chunk_size=None):
text.append(line if isinstance(line, str) else line.decode('utf-8'))
text = ''.join(text)
else:
text = resp.text
Expand Down
46 changes: 21 additions & 25 deletions application/lib/ebook_translator/engines/chatgpt.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import json

from urllib.parse import urljoin
from .base import Base
from .languages import google

Expand All @@ -12,8 +12,9 @@ class ChatgptTranslate(Base):
name = 'ChatGPT'
alias = 'ChatGPT (OpenAI)'
lang_codes = Base.load_lang_codes(google)
endpoint = 'https://api.openai.com/v1/chat/completions'
# api_key_hint = 'sk-xxx...xxx'
default_api_host = 'https://api.openai.com'
endpoint = '/v1/chat/completions'
api_key_hint = 'sk-xxx...xxx'
# https://help.openai.com/en/collections/3808446-api-error-codes-explained
api_key_errors = ['401', 'unauthorized', 'quota']

Expand All @@ -39,8 +40,8 @@ class ChatgptTranslate(Base):
top_p = 1
stream = True

def __init__(self):
Base.__init__(self)
def __init__(self, config=None):
Base.__init__(self, config)
self.endpoint = self.config.get('endpoint', self.endpoint)
self.prompt = self.config.get('prompt', self.prompt)
if self.model is None:
Expand Down Expand Up @@ -91,9 +92,9 @@ def translate(self, text):
data = self._get_data(text)
sampling_value = getattr(self, self.sampling)
data.update({self.sampling: sampling_value})
endpoint = urljoin(self.api_host or self.default_api_host, self.endpoint)

return self.get_result(
self.endpoint, json.dumps(data), self._get_headers(),
return self.get_result(endpoint, json.dumps(data), self._get_headers(),
method='POST', stream=self.stream, callback=self._parse)

def _parse(self, data):
Expand All @@ -102,30 +103,25 @@ def _parse(self, data):
return json.loads(data)['choices'][0]['message']['content']

def _parse_stream(self, data):
while True:
try:
line = data.readline().decode('utf-8').strip()
except IncompleteRead:
ret = []
for line in data.split('\n'):
line = line.strip()
if not line or not line.startswith('data:'):
continue
except Exception as e:
raise Exception(
_('Can not parse returned response. Raw data: {}')
.format(str(e)))
if line.startswith('data:'):
chunk = line.split('data: ')[1]
if chunk == '[DONE]':
break
delta = json.loads(chunk)['choices'][0]['delta']
if 'content' in delta:
yield str(delta['content'])

chunk = line.split('data: ')[1].strip()
if chunk == '[DONE]':
break
delta = json.loads(chunk)['choices'][0]['delta']
if 'content' in delta:
ret.append(str(delta['content']))
return ''.join(ret)

class AzureChatgptTranslate(ChatgptTranslate):
name = 'ChatGPT(Azure)'
alias = 'ChatGPT (Azure)'
endpoint = (
'$AZURE_OPENAI_ENDPOINT/openai/deployments/gpt-35-turbo/chat/'
'completions?api-version=2023-05-15')
default_api_host = ''
endpoint = '/openai/deployments/gpt-35-turbo/chat/completions?api-version=2023-05-15'
model = None

def _get_headers(self):
Expand Down
15 changes: 8 additions & 7 deletions application/lib/ebook_translator/engines/deepl.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import json
import time
import random

from urllib.parse import urljoin
from .base import Base
from .languages import deepl

class DeeplTranslate(Base):
name = 'DeepL'
alias = 'DeepL'
lang_codes = Base.load_lang_codes(deepl)
default_api_host = 'https://api-free.deepl.com'
endpoint = {
'translate': 'https://api-free.deepl.com/v2/translate',
'usage': 'https://api-free.deepl.com/v2/usage',
'translate': '/v2/translate',
'usage': '/v2/usage',
}
# api_key_hint = 'xxx-xxx-xxx:fx'
placeholder = ('<m id={} />', r'<m\s+id={}\s+/>')
Expand All @@ -20,8 +21,8 @@ class DeeplTranslate(Base):
def get_usage(self):
# See: https://www.deepl.com/docs-api/general/get-usage/
headers = {'Authorization': 'DeepL-Auth-Key %s' % self.api_key}
usage = self.get_result(
self.endpoint.get('usage'), headers=headers, silence=True,
endpoint = urljoin(self.api_host or self.default_api_host, self.endpoint.get('usage'))
usage = self.get_result(endpoint, headers=headers, silence=True,
callback=lambda r: json.loads(r))
if usage is None:
return None
Expand All @@ -42,8 +43,8 @@ def translate(self, text):
if not self._is_auto_lang():
data.update(source_lang=self._get_source_code())

return self.get_result(
self.endpoint.get('translate'), data, headers, method='POST',
endpoint = urljoin(self.api_host or self.default_api_host, self.endpoint.get('translate'))
return self.get_result(endpoint, data, headers, method='POST',
callback=lambda r: json.loads(r)['translations'][0]['text'])


Expand Down
Loading

0 comments on commit f72a19b

Please sign in to comment.