Skip to content
This repository has been archived by the owner on Oct 23, 2019. It is now read-only.

Commit

Permalink
Merge pull request #13 from Girbons/development
Browse files Browse the repository at this point in the history
Added support for mangahere
  • Loading branch information
Girbons authored Oct 31, 2018
2 parents 124006c + e7edba8 commit 6245a83
Show file tree
Hide file tree
Showing 13 changed files with 120 additions and 17 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
### 0.5 [2018-10-31]

* Added support for http://www.mangahere.cc/

### 0.4.1 [2018-10-30]

* updated requests (#8 -- thanks @michaelbukachi)
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

- http://www.comicextra.com/
- https://www.mangareader.net/
- http://www.mangahere.cc/
- http://readcomiconline.to/

Didn't find your site? Open an issue and it will be added or submit a pull request.
Expand Down
2 changes: 1 addition & 1 deletion comics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
__title__ = 'comics-downloader'
__autor__ = 'Alessandro De Angelis'
__version__ = '0.4.1'
__version__ = '0.5'
9 changes: 4 additions & 5 deletions comics/core/comics.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,16 @@

import requests
import img2pdf
import validators

from bs4 import BeautifulSoup
from natsort import natsorted
from tqdm import tqdm
from natsort import natsorted

from .scraper import Scraper

from ..compat import TemporaryDirectory
from ..settings import comics_settings
from ..utils import create_and_change_dir
from ..utils import create_and_change_dir, is_url_valid


class BaseComics(object):
Expand All @@ -40,7 +39,7 @@ def images_links(self, response):
"""
:param response: is the response instance.
from a response contet extract images link
from a response content extract images link
and return a list of link
exclude .gif
"""
Expand All @@ -49,7 +48,7 @@ def images_links(self, response):
links = []

for link in match:
if not link.endswith('.gif') and validators.url(link):
if is_url_valid(link):
links.append(link)

return links
Expand Down
8 changes: 5 additions & 3 deletions comics/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@


DEFAULT_SETTINGS = {
'comicextra.com': 'comics.sites.comicextra.ComicExtra',
'mangareader.net': 'comics.sites.mangareader.MangaReader',
'readcomiconline.to': 'comics.sites.readcomiconline.ReadComicOnline',
'comicextra.com': 'comics.sites.ComicExtra',
'mangahere.cc': 'comics.sites.MangaHere',
'mangareader.net': 'comics.sites.MangaReader',
'readcomiconline.to': 'comics.sites.ReadComicOnline',

# is the path where you will find all your downloaded comics
# divided by domain
Expand All @@ -18,6 +19,7 @@
IMPORT_STRINGS = (
'comicextra.com',
'mangareader.net',
'mangahere.cc',
'readcomiconline.to',
)

Expand Down
5 changes: 5 additions & 0 deletions comics/sites/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .comicextra import ComicExtra # noqa
from .mangahere import MangaHere # noqa
from .mangareader import MangaReader # noqa
from .readcomiconline import ReadComicOnline # noqa
from .readcomicsio import ReadComics # noqa
46 changes: 46 additions & 0 deletions comics/sites/mangahere.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import re

import requests

from bs4 import BeautifulSoup

from ..core.comics import BaseComics
from ..utils import is_url_valid


class MangaHere(BaseComics):
"""
class for http://www.mangahere.cc/
"""
def __init__(self, url):
self.base_url = 'http://www.mangahere.cc/'
self._image_regex = r'<img[^>]+src="([^">]+)"'
self.antibot = False
super(MangaHere, self).__init__(url)

@property
def name(self):
return self.splitted_url[4]

@property
def issue_number(self):
return self.splitted_url[5]

def images_links(self, response):
session = requests.Session()
soup = BeautifulSoup(response.content, 'html.parser')
# retrieve the <options> in page
options = soup.findAll('option')
links = []

for option in options:
links.append('http:{}'.format(option['value']))

images_links = []
for link in links:
response = session.get(link)
image_url = re.findall(self._image_regex, response.text)[1]
if is_url_valid(image_url):
images_links.append(image_url)

return images_links
4 changes: 3 additions & 1 deletion comics/sites/mangareader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from bs4 import BeautifulSoup

from ..core.comics import BaseComics
from ..utils import is_url_valid


class MangaReader(BaseComics):
Expand Down Expand Up @@ -40,6 +41,7 @@ def images_links(self, response):
response = session.get(link)
# we'll find only 1 image
image_url = re.findall(self._image_regex, response.text)[0]
images_links.append(image_url)
if is_url_valid(image_url):
images_links.append(image_url)

return images_links
14 changes: 14 additions & 0 deletions comics/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import os

import validators


def create_and_change_dir(dir_name):
"""
Expand All @@ -11,3 +13,15 @@ def create_and_change_dir(dir_name):
os.mkdir(dir_name)

os.chdir(dir_name)


def is_url_valid(url):
"""
:param string url: is a url
check if the given url is valid and is not a gif
"""
if not url.endswith('.gif') and validators.url(url):
return True

return False
10 changes: 10 additions & 0 deletions tests/test_comics_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,13 @@ def test_mangareader():
'comics-download', '-u', 'https://www.mangareader.net/naruto/1'
])
assert os.path.isfile("downloaded_comics/MangaReader/naruto/1.pdf")


def test_mangahere():
"""
Test download from http://mangahere.cc
"""
subprocess.call([
'comics-download', '-u', 'http://www.mangahere.cc/manga/shingeki_no_kyojin_before_the_fall/c048/'
])
assert os.path.isfile("downloaded_comics/MangaHere/shingeki_no_kyojin_before_the_fall/c048.pdf")
2 changes: 1 addition & 1 deletion tests/tests_unit/test_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from comics.core.downloader import Downloader
from comics.exceptions import NotSupportedSite

from comics.sites.comicextra import ComicExtra
from comics.sites import ComicExtra
from comics.settings import comics_settings


Expand Down
23 changes: 19 additions & 4 deletions tests/tests_unit/test_sites.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@
import pytest

from comics.core.comics import BaseComics
from comics.sites.readcomiconline import ReadComicOnline
from comics.sites.comicextra import ComicExtra
from comics.sites.readcomicsio import ReadComics
from comics.sites.mangareader import MangaReader

from comics.sites import (
ComicExtra,
ReadComics,
ReadComicOnline,
MangaHere,
MangaReader,
)


def test_base_comic():
Expand Down Expand Up @@ -55,3 +59,14 @@ def test_mangareader():
assert comics.name == 'naruto'
assert comics.issue_number == '1'
assert len(result) == 53


def test_mangahere():
url = 'http://www.mangahere.cc/manga/shingeki_no_kyojin_before_the_fall/c048/'
comics = MangaHere(url)
response = comics.scraper.scrape_comic(False)
result = comics.images_links(response)

assert comics.name == 'shingeki_no_kyojin_before_the_fall'
assert comics.issue_number == 'c048'
assert len(result) == 132
9 changes: 7 additions & 2 deletions tests/tests_unit/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
import os


from comics.utils import create_and_change_dir
from comics.utils import create_and_change_dir, is_url_valid


def test_create_and_change():
create_and_change_dir('test_dir')
current_wd = os.getcwd()
assert 'test_dir' in current_wd
os.removedirs(current_wd)


def test_valid_url():
assert is_url_valid('http://example.com') is True
assert is_url_valid('http://foo.gif') is False
assert is_url_valid('aaaaaa') is False

0 comments on commit 6245a83

Please sign in to comment.