Skip to content

Commit

Permalink
enhanced anti-anti-crawler
Browse files Browse the repository at this point in the history
  • Loading branch information
cdhigh committed Aug 13, 2024
1 parent 5b066ab commit 9048b25
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from application.utils import loc_exc_pos

class RecipeDisabled(Exception):
pass
Expand Down Expand Up @@ -85,12 +86,8 @@ def convert(self, recipes, opts, file_ext, log, output_dir, fs):
ro.masthead_url = None
indexFile = ro.download()
except Exception as e:
if str(e) == 'No articles downloaded, aborting':
msg = f'Recipe "{recipe.title}": {e}'
else:
msg = f'Failed to execute recipe "{recipe.title}": {e}'
log.warning(msg)
log.debug(traceback.format_exc())
log.warning(loc_exc_pos(f'Failed to execute recipe "{recipe.title}"'))
#log.debug(traceback.format_exc())
continue

if indexFile and ro.feed_objects:
Expand Down
9 changes: 7 additions & 2 deletions application/lib/urlopener.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,15 @@ def __init__(self, *, host=None, timeout=60, headers=None, file_stub=None, user_
self.host = host
self.timeout = timeout or 60
#addheaders不使用字典是为了和mechanize接口兼容
self.addheaders = [ #下面的代码假定第一个元素为 'User-Agent'
#为了下面的代码方便,第一个元素必须为 'User-Agent'
self.addheaders = [
('User-Agent', "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0)"),
#('User-Agent', "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"), #IE11
#('User-Agent', "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"),
("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"),
("Accept-Encoding", "gzip, deflate"),]
("Accept-Encoding", "gzip, deflate"),
('Accept-Language', "en,*"),
]
headers = headers.items() if isinstance(headers, dict) else (headers or [])
self.addheaders.extend([(key, value) for key, value in headers])
if user_agent:
Expand Down

0 comments on commit 9048b25

Please sign in to comment.