Skip to content

Commit

Permalink
[Grouple] Fix pages parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
Koitharu committed Dec 19, 2024
1 parent 326a3f7 commit 0422517
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 41 deletions.
2 changes: 1 addition & 1 deletion .github/summary.yaml
Original file line number Diff line number Diff line change
@@ -1 +1 @@
total: 1172
total: 1173
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.koitharu.kotatsu.parsers.exception

import okio.IOException
import org.koitharu.kotatsu.parsers.InternalParsersApi
import org.koitharu.kotatsu.parsers.model.MangaSource

Expand All @@ -9,4 +10,4 @@ import org.koitharu.kotatsu.parsers.model.MangaSource
public class AuthRequiredException @InternalParsersApi @JvmOverloads constructor(
public val source: MangaSource,
cause: Throwable? = null,
) : RuntimeException("Authorization required", cause)
) : IOException("Authorization required", cause)
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@ import org.koitharu.kotatsu.parsers.MangaLoaderContext
import org.koitharu.kotatsu.parsers.MangaSourceParser
import org.koitharu.kotatsu.parsers.config.ConfigKey
import org.koitharu.kotatsu.parsers.exception.AuthRequiredException
import org.koitharu.kotatsu.parsers.exception.ParseException
import org.koitharu.kotatsu.parsers.model.ContentType
import org.koitharu.kotatsu.parsers.model.MangaChapter
import org.koitharu.kotatsu.parsers.model.MangaPage
import org.koitharu.kotatsu.parsers.model.MangaParserSource
import org.koitharu.kotatsu.parsers.util.domain
import org.koitharu.kotatsu.parsers.util.parseFailed
Expand All @@ -20,8 +17,8 @@ internal class AllHentaiParser(
) : GroupleParser(context, MangaParserSource.ALLHENTAI, 1) {

override val configKeyDomain = ConfigKey.Domain(
"z.ahen.me",
"20.allhen.online",
"z.ahen.me",
"24.allhen.online",
"z.allhen.online",
"2023.allhen.online",
Expand All @@ -34,18 +31,6 @@ internal class AllHentaiParser(
return "https://qawa.org/internal/auth/login?targetUri=$targetUri&siteId=1"
}

override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
try {
return super.getPages(chapter)
} catch (e: ParseException) {
if (isAuthorized) {
throw e
} else {
throw AuthRequiredException(source, e)
}
}
}

override suspend fun getUsername(): String {
val root = webClient.httpGet("https://qawa.org/").parseHtml().body()
val element = root.selectFirst("img.user-avatar") ?: throw AuthRequiredException(source)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import okhttp3.Interceptor
import okhttp3.Response
import okhttp3.internal.closeQuietly
import okhttp3.internal.headersContentLength
import okio.IOException
import org.json.JSONArray
import org.jsoup.nodes.Element
import org.koitharu.kotatsu.parsers.MangaLoaderContext
Expand All @@ -26,6 +27,7 @@ import org.koitharu.kotatsu.parsers.util.*
import org.koitharu.kotatsu.parsers.util.json.getStringOrNull
import org.koitharu.kotatsu.parsers.util.json.mapJSON
import org.koitharu.kotatsu.parsers.util.suspendlazy.suspendLazy
import java.net.HttpURLConnection
import java.text.SimpleDateFormat
import java.util.*

Expand Down Expand Up @@ -111,7 +113,7 @@ internal abstract class GroupleParser(
}

override suspend fun getDetails(manga: Manga): Manga {
val response = webClient.httpGet(manga.url.toAbsoluteUrl(domain)).checkAuthRequired()
val response = webClient.httpGet(manga.url.toAbsoluteUrl(domain))
val doc = response.parseHtml()
val root = doc.body().requireElementById("mangaBox").run {
selectFirst("div.leftContent") ?: this
Expand Down Expand Up @@ -205,17 +207,21 @@ internal abstract class GroupleParser(
return context.newParserInstance(chapter.source).getPages(chapter)
}
val url = chapter.url.toAbsoluteUrl(domain).toHttpUrl().newBuilder().setQueryParameter("mtr", "1").build()
val doc = webClient.httpGet(url).checkAuthRequired().parseHtml()
val doc = webClient.httpGet(url).parseHtml()
val scripts = doc.select("script")
for (script in scripts) {
val data = script.html()
var pos = data.indexOf("rm_h.readerDoInit(")
if (pos != -1) {
parsePagesNew(data, pos)?.let { return it }
parsePagesV2(data, pos)?.let { return it }
}
pos = data.indexOf("rm_h.readerInit( 0,")
if (pos != -1) {
parsePagesOld(data, pos)?.let { return it }
parsePagesV1(data, pos)?.let { return it }
}
pos = data.indexOf(".readerInit(")
if (pos != -1) {
parsePagesV3(data, pos).let { return it }
}
}
doc.parseFailed("Pages list not found at ${chapter.url}")
Expand All @@ -224,7 +230,7 @@ internal abstract class GroupleParser(
override suspend fun getPageUrl(page: MangaPage): String {
val parts = page.url.split('|')
if (parts.size < 2) {
throw ParseException("No servers found for page", page.url)
return page.url
}
val path = parts.last()
// fast path
Expand Down Expand Up @@ -285,15 +291,15 @@ internal abstract class GroupleParser(
override fun intercept(chain: Interceptor.Chain): Response {
val request = chain.request()
if (!request.header(HEADER_ACCEPT).isNullOrEmpty()) {
return chain.proceed(request)
return chain.proceed(request).checkIfAuthRequired()
}
val ext = request.url.pathSegments.lastOrNull()?.substringAfterLast('.', "")?.lowercase(Locale.ROOT)
return if (ext == "jpg" || ext == "jpeg" || ext == "png" || ext == "webp") {
chain.proceed(
request.newBuilder().header(HEADER_ACCEPT, "image/webp,image/png;q=0.9,image/jpeg,*/*;q=0.8").build(),
)
} else {
chain.proceed(request)
chain.proceed(request).checkIfAuthRequired()
}
}

Expand All @@ -304,7 +310,7 @@ internal abstract class GroupleParser(
}

override suspend fun getRelatedManga(seed: Manga): List<Manga> {
val doc = webClient.httpGet(seed.url.toAbsoluteUrl(domain)).checkAuthRequired().parseHtml()
val doc = webClient.httpGet(seed.url.toAbsoluteUrl(domain)).parseHtml()
val root = doc.body().requireElementById("mangaBox").select("h4").first { it.ownText() == RELATED_TITLE }
.nextElementSibling() ?: doc.parseFailed("Cannot find root")
return root.select("div.tile").mapNotNull(::parseManga)
Expand Down Expand Up @@ -386,15 +392,6 @@ internal abstract class GroupleParser(
}
}.getOrDefault(false)

private fun Response.checkAuthRequired(): Response {
val lastPathSegment = request.url.pathSegments.lastOrNull() ?: return this
if (lastPathSegment == "login") {
closeQuietly()
throw AuthRequiredException(source)
}
return this
}

private fun Response.isPumpkin(): Boolean = request.url.host == "upload.wikimedia.org"

private fun parseManga(node: Element): Manga? {
Expand Down Expand Up @@ -440,14 +437,14 @@ internal abstract class GroupleParser(
)
}

private fun parsePagesNew(data: String, pos: Int): List<MangaPage>? {
private fun parsePagesV1(data: String, pos: Int): List<MangaPage>? {
val json = data.substring(pos).substringAfter('(').substringBefore('\n').substringBeforeLast(')')
if (json.isEmpty()) {
return null
}
val ja = JSONArray("[$json]")
val pages = ja.getJSONArray(0)
val servers = ja.getJSONArray(2).mapJSON { it.getString("path") }
val pages = ja.getJSONArray(1)
val servers = ja.getJSONArray(3).mapJSON { it.getString("path") }
val serversStr = servers.joinToString("|")
return (0 until pages.length()).map { i ->
val page = pages.getJSONArray(i)
Expand All @@ -462,14 +459,14 @@ internal abstract class GroupleParser(
}
}

private fun parsePagesOld(data: String, pos: Int): List<MangaPage>? {
private fun parsePagesV2(data: String, pos: Int): List<MangaPage>? {
val json = data.substring(pos).substringAfter('(').substringBefore('\n').substringBeforeLast(')')
if (json.isEmpty()) {
return null
}
val ja = JSONArray("[$json]")
val pages = ja.getJSONArray(1)
val servers = ja.getJSONArray(3).mapJSON { it.getString("path") }
val pages = ja.getJSONArray(0)
val servers = ja.getJSONArray(2).mapJSON { it.getString("path") }
val serversStr = servers.joinToString("|")
return (0 until pages.length()).map { i ->
val page = pages.getJSONArray(i)
Expand All @@ -484,6 +481,21 @@ internal abstract class GroupleParser(
}
}

private fun parsePagesV3(data: String, pos: Int): List<MangaPage> {
val json = JSONArray(data.substring(pos).substringBetween("(", ")").substringBeforeLast(','))
return (0 until json.length()).map { i ->
val ja = json.getJSONArray(i)
val server = ja.getString(0).ifEmpty { "https://$domain" }
val url = ja.getString(2)
MangaPage(
id = generateUid(url),
url = concatUrl(server, url),
preview = null,
source = source,
)
}
}

private suspend fun fetchTagsMap(): ScatterMap<String, String> {
val url = "https://$domain/search/advanced"
val properties =
Expand All @@ -510,4 +522,18 @@ internal abstract class GroupleParser(
.toString()
.toRelativeUrl(domain)
}

@Throws(IOException::class)
private fun Response.checkIfAuthRequired(): Response {
if (code == HttpURLConnection.HTTP_NOT_FOUND && !isAuthorized) {
closeQuietly()
throw AuthRequiredException(source)
}
val lastPathSegment = request.url.pathSegments.lastOrNull()
if (lastPathSegment == "login") {
closeQuietly()
throw AuthRequiredException(source)
}
return this
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ internal class SeiMangaParser(
companion object {

val domains = arrayOf(
"1.seimanga.me",
"seimanga.me",
)
}
Expand Down

0 comments on commit 0422517

Please sign in to comment.