Skip to content

Commit

Permalink
^q^
Browse files Browse the repository at this point in the history
  • Loading branch information
KurtBestor committed Jul 24, 2023
1 parent f3a5236 commit d61601f
Show file tree
Hide file tree
Showing 47 changed files with 749 additions and 319 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
## Links
- [Download](https://github.com/KurtBestor/Hitomi-Downloader/releases/latest)
- [Issues](https://github.com/KurtBestor/Hitomi-Downloader/issues)
- [Scripts](https://github.com/KurtBestor/Hitomi-Downloader/wiki/Scripts)
- [Scripts & Plugins](https://github.com/KurtBestor/Hitomi-Downloader/wiki/Scripts-&-Plugins)
- [Chrome Extension](https://github.com/KurtBestor/Hitomi-Downloader/wiki/Chrome-Extension)

## Demo
Expand Down Expand Up @@ -60,8 +60,9 @@
| **Iwara** | <https://iwara.tv><br><https://ecchi.iwara.tv> |
| **Jmana** | <https://jmana.net> |
| **カクヨム** | <https://kakuyomu.jp> |
| **Likee** | <https://likee.video> |
| **Luscious** | <https://luscious.net> |
| **Mastodon** | <https://mastodon.social> |
| **Misskey** | <https://misskey.io> |
| **MyReadingManga** | <https://myreadingmanga.info> |
| **Naver Blog** | <https://blog.naver.com> |
| **Naver Cafe** | <https://cafe.naver.com> |
Expand All @@ -82,6 +83,7 @@
| **Sankaku Complex** | <https://www.sankakucomplex.com><br><https://chan.sankakucomplex.com><br><https://idol.sankakucomplex.com> |
| **Soundcloud** | <https://soundcloud.com> |
| **小説家になろう** | <https://syosetu.com> |
| **TikTok** | <https://tiktok.com><br><https://douyin.com>|
| **TOKYO Motion** | <https://tokyomotion.net> |
| **Tumblr** | <https://tumblr.com> |
| **Twitch** | <https://twitch.tv> |
Expand Down
11 changes: 8 additions & 3 deletions src/extractor/afreeca_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ def __init__(self, stream, referer, id, title, url_thumb):
downloader.download(url_thumb, buffer=self.thumb)


class LoginRequired(errors.LoginRequired):
def __init__(self, *args):
super().__init__(*args, method='browser', url='https://login.afreecatv.com/afreeca/login.php')



class Downloader_afreeca(Downloader):
type = 'afreeca'
Expand Down Expand Up @@ -61,11 +66,11 @@ def get_video(url, session, cw):

html = downloader.read_html(url, session=session)
if "document.location.href='https://login." in html:
raise errors.LoginRequired()
raise LoginRequired()
if len(html) < 2000:
alert = re.find(r'''alert\(['"](.+?)['"]\)''', html)
if alert:
raise errors.LoginRequired(alert)
raise LoginRequired(alert)
soup = Soup(html)

url_thumb = soup.find('meta', {'property': 'og:image'}).attrs['content']
Expand All @@ -85,7 +90,7 @@ def get_video(url, session, cw):
title = data['full_title']

if data.get('adult_status') == 'notLogin':
raise errors.LoginRequired(title)
raise LoginRequired(title)

urls_m3u8 = []
for file in data['files']:
Expand Down
3 changes: 1 addition & 2 deletions src/extractor/baraag_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ def read(self):
id_ = get_id(self.url)
info = get_info('baraag.net', id_, f'baraag_{id_}', self.session, self.cw)

for img in info['imgs']:
self.urls.append(img.url)
self.urls += info['files']

self.title = clean_title('{} (baraag_{})'.format(info['title'], id_))
4 changes: 2 additions & 2 deletions src/extractor/etc_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def _get_video(url, session, cw, ie_key=None, allow_m3u8=True):
fs = []
for i, f in enumerate(formats):
f['_index'] = i
f['_resolution'] = int_or_none(re.find('([0-9]+)p', f['format'], re.IGNORECASE)) or f.get('height') or f.get('width') or int_or_none(f.get('quality')) or int(f.get('vcodec', 'none') != 'none') #5995
f['_resolution'] = int_or_none(re.find(r'([0-9]+)p', f['format'], re.I)) or f.get('height') or f.get('width') or int_or_none(f.get('quality')) or int(f.get('vcodec', 'none') != 'none') #5995
f['_vbr'] = f.get('vbr') or 0
f['_audio'] = f.get('abr') or f.get('asr') or int(f.get('acodec', 'none') != 'none')
print_(format_(f))
Expand All @@ -144,7 +144,7 @@ def filter_f(fs):
print_('invalid url: {}'.format(f['url']))
return list(fs)[0]#

f_video = filter_f(reversed(sorted(fs, key=lambda f:(f['_resolution'], f['_vbr'], f['_index']))))
f_video = filter_f(sorted(fs, key=lambda f:(f['_resolution'], int(bool(f['_audio'])), f['_vbr'], f['_index']), reverse=True)) #6072, #6118
print_('video0: {}'.format(format_(f_video)))

if f_video['_audio']:
Expand Down
4 changes: 2 additions & 2 deletions src/extractor/fc2_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ class Downloader_fc2(Downloader):

@classmethod
def fix_url(cls, url):
if not re.match('https?://.+', url, re.IGNORECASE):
url = 'https://video.fc2.com/content/{}'.format(url)
if not re.match(r'https?://.+', url, re.I):
url = f'https://video.fc2.com/content/{url}'
return url

@classmethod
Expand Down
2 changes: 1 addition & 1 deletion src/extractor/iwara_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def f(html, browser=None):
if time() - t0 > 10 or '/profile/' in url.lower():
for a in soup.findAll('a'):
if urljoin(url, a.get('href', '')) == urljoin(url, '/login'):
raise errors.LoginRequired(method='browser', url='https://www.iwara.tv/login') #5794
raise errors.LoginRequired(method='browser', url='https://www.iwara.tv/login', cookie=False) #5794
buttons = soup.findAll(class_='button--primary')
if buttons:
for i, button in enumerate(buttons):
Expand Down
18 changes: 8 additions & 10 deletions src/extractor/jmana_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import bs4
import clf2
PATTERN = r'jmana[0-9]*.*/(comic_list_title|book)\?book'
PATTERN_ALL = r'jmana[0-9]*.*/(comic_list_title|book|bookdetail)\?book'
PATTERN_ALL = r'jmana[0-9]*.*/((comic_list_title|book|bookdetail)\?book|book_by_title\?title)' #6157
PATTERN_ID = '[?&]bookdetailid=([0-9]+)'


Expand Down Expand Up @@ -56,7 +56,7 @@ def init(self):
self.url = self.fix_url(url)
self._soup = None

for i, page in enumerate(get_pages(self.url, self.session, self.soup)):
for i, page in enumerate(get_pages(self.url, self.soup, self.session)):
if page.id == int(op['value']):
break
else:
Expand Down Expand Up @@ -147,10 +147,7 @@ def get_imgs_page(page, referer, session, cw=None):
return imgs


def get_pages(url, session=None, soup=None):
if soup is None:
res = clf2.solve(url, session=session) #4070
soup = Soup(res['html'])
def get_pages(url, soup, session):
pages = []
for inner in soup.findAll('div', class_='inner'):
a = inner.find('a')
Expand All @@ -172,12 +169,13 @@ def get_pages(url, session=None, soup=None):


@page_selector.register('jmana')
@try_n(4)
def f(url):
def f(url, win):
if re.search(PATTERN_ID, url):
raise Exception(tr_('목록 주소를 입력해주세요'))
session = Session()
pages = get_pages(url, session=session)
res = clf2.solve(url, session=session, win=win) #4070
soup = Soup(res['html'])
pages = get_pages(url, soup, session)
return pages


Expand All @@ -186,7 +184,7 @@ def get_imgs(url, title, session, soup=None, cw=None):
if soup is None:
html = downloader.read_html(url, session=session)
soup = Soup(html)
pages = get_pages(url, soup=soup)
pages = get_pages(url, soup, session)
print_('pages: {}'.format(len(pages)))
pages = page_selector.filter(pages, cw)
imgs = []
Expand Down
42 changes: 14 additions & 28 deletions src/extractor/lhscan_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ def __init__(self, title, url):
self.url = url


def get_soup_session(url, cw=None):
def get_soup_session(url, cw=None, win=None):
print_ = get_print(cw)
session = Session()
res = clf2.solve(url, session=session, cw=cw)
res = clf2.solve(url, session=session, cw=cw, win=win)
print_('{} -> {}'.format(url, res['url']))
if res['url'].rstrip('/') == 'https://welovemanga.one':
raise errors.LoginRequired()
Expand All @@ -55,10 +55,9 @@ class Downloader_lhscan(Downloader):
]
MAX_CORE = 16
display_name = 'LHScan'
_soup = None

def init(self):
self._soup, self.session = get_soup_session(self.url, self.cw)
self.soup, self.session = get_soup_session(self.url, self.cw)
if not self.soup.find('ul', class_='manga-info'):
raise errors.Invalid('{}: {}'.format(tr_('목록 주소를 입력해주세요'), self.url))

Expand All @@ -68,21 +67,6 @@ def fix_url(cls, url):
url = url.replace('welovemanga.net', 'welovemanga.one') #4298
return url

@property
def soup(self):
if self._soup is None:
for try_ in range(8):
try:
html = downloader.read_html(self.url, session=self.session)
break
except Exception as e:
e_ = e
print(e)
else:
raise e_
self._soup = Soup(html)
return self._soup

@property
def name(self):
title = self.soup.find('ul', class_='manga-info').find('h3').text
Expand Down Expand Up @@ -115,10 +99,10 @@ def get_imgs_page(page, referer, session, cw=None):
pass
soup = Soup(html)

view = soup.find('div', class_='chapter-content')

if not view:
raise Exception('no chapter-content')
cid = re.find(r'''load_image\(([0-9]+)''', html)
if cid: #6186
url_api = urljoin(page.url, f'/app/manga/controllers/cont.listImg.php?cid={cid}')
soup = downloader.read_soup(url_api, page.url, session=session)

imgs = []
for img in soup.findAll('img', class_='chapter-img'):
Expand All @@ -140,9 +124,12 @@ def get_imgs_page(page, referer, session, cw=None):
continue
if '/uploads/lazy_loading.gif' in src:
continue
if '/xstaff.jpg.pagespeed.ic.gPQ2SGcYaN.webp' in src:
continue
src = src.replace('\n', '').replace('\r', '') #5238
if 'proxy.php?link=' not in src: #5351
src = 'https://welovekai.com/proxy.php?link=' + src #5238
#6105
## if 'proxy.php?link=' not in src: #5351
## src = 'https://welovekai.com/proxy.php?link=' + src #5238
if not imgs:
print_(src0)
print_(src)
Expand Down Expand Up @@ -174,9 +161,8 @@ def get_pages(url, session, soup=None, cw=None):


@page_selector.register('lhscan')
@try_n(4)
def f(url):
soup, session = get_soup_session(url)
def f(url, win):
soup, session = get_soup_session(url, win=win)
pages = get_pages(url, session, soup=soup)
return pages

Expand Down
117 changes: 0 additions & 117 deletions src/extractor/likee_downloader.py

This file was deleted.

Loading

0 comments on commit d61601f

Please sign in to comment.