Skip to content

Commit

Permalink
convert images to data ui (effectively embedding them into the html)
Browse files Browse the repository at this point in the history
  • Loading branch information
Aivean committed Dec 21, 2023
1 parent db7f665 commit 587d23a
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 5 deletions.
33 changes: 28 additions & 5 deletions src/main/scala/com/aivean/royalroad/Main.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@ package com.aivean.royalroad

import com.aivean.royalroad.Utils._
import net.ruippeixotog.scalascraper.browser.JsoupBrowser
import net.ruippeixotog.scalascraper.browser.JsoupBrowser.JsoupElement
import net.ruippeixotog.scalascraper.dsl.DSL
import net.ruippeixotog.scalascraper.model.Document
import org.jsoup.Connection

import java.io.PrintWriter
import java.net.URLDecoder
import java.net.{URL, URLDecoder}
import java.util.concurrent.ArrayBlockingQueue
import scala.concurrent.duration.DurationInt
import scala.concurrent.{Await, Future, duration}
import scala.util.{Failure, Success, Try}

object Main extends App {

Expand Down Expand Up @@ -102,11 +104,32 @@ object Main extends App {
</h1>.toString()
)

// get chapter content
val chapterContent = (doc >?> element(cliArgs.bodyQuery()))
.getOrElse(parsingError("chapter text", cliArgs.bodyQuery(), url))

// find all image elements in the chapter content
val imageElements = chapterContent.select("img")

// replace all image elements with their data URI
imageElements.collect {
case img: JsoupElement =>
val imgUrl = img.attr("src")
println("embedding image: " + imgUrl)
Try(new URL(imgUrl)) match {
case Success(url) =>
Try(retry(getDataURIForURL(url))) match {
case Success(dataUrl) => img.underlying.attr("src", dataUrl.toString)
case Failure(e) =>
println(s"Failed to convert $imgUrl to data URL")
e.printStackTrace()
}
case Failure(_) => println(s"Invalid URL: $imgUrl")
}
}

// write chapter content to file
printWriter.write(
(doc >?> element(cliArgs.bodyQuery()))
.getOrElse(parsingError("chapter text", cliArgs.bodyQuery(), url)).outerHtml
)
printWriter.write(chapterContent.outerHtml)

printWriter.write("\n")
}
Expand Down
44 changes: 44 additions & 0 deletions src/main/scala/com/aivean/royalroad/Utils.scala
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
package com.aivean.royalroad

import java.io.{BufferedInputStream, ByteArrayOutputStream, IOException}
import java.net._
import java.util.Base64
import java.util.concurrent.atomic.AtomicLong
import scala.util.{Failure, Success, Try}


/**
* Misc helper functions
*/
Expand Down Expand Up @@ -55,4 +59,44 @@ object Utils {
case Success(res) => res
case Failure(e) => if (times > 1) Utils.retry(f, times - 1) else throw e
}

def withResource[R <: AutoCloseable, T](resource: => R)(block: R => T): T = {
var res: Option[R] = None
try {
res = Some(resource)
block(res.get)
} finally {
res.foreach(_.close())
}
}

def getDataURIForURL(url: URL): URI = withResource(url.openStream()) { is =>
val bis = new BufferedInputStream(is)
val contentType = URLConnection.guessContentTypeFromStream(bis) match {
case null => // try to guess from url
val ext = url.toString.split('.').lastOption
ext match {
case Some("jpg") => "image/jpeg"
case Some("png") => "image/png"
case Some("gif") => "image/gif"
case _ => null
}
case x => x
}

if (contentType != null) {
withResource(new ByteArrayOutputStream()) { os =>
val chunk = new Array[Byte](4096)
Stream.continually(bis.read(chunk))
.takeWhile(_ > 0)
.foreach(readBytes => os.write(chunk, 0, readBytes))
os.flush()
new URI("data:" + contentType + ";base64," +
Base64.getEncoder.encodeToString(os.toByteArray))
}
} else {
throw new IOException("could not get content type from " + url.toExternalForm)
}
}

}
17 changes: 17 additions & 0 deletions src/test/scala/com/aivean/royalroad/UtilsTest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package com.aivean.royalroad

import org.scalatest.FunSuite

import java.net.{URI, URL}

class UtilsTest extends FunSuite {

// test url to data uri conversion
test("urlToDataUri") {
// small red dot
val url = new URL("https://github.com/Aivean/royalroad-downloader/assets/2865203/e5e66293-d94e-4377-be85-b5dc77f0fbfa")
val dataUri = Utils.getDataURIForURL(url)
assert(dataUri ==
new URI(""))
}
}

0 comments on commit 587d23a

Please sign in to comment.