Skip to content

Commit

Permalink
Added support for iframes inside figures
Browse files Browse the repository at this point in the history
  • Loading branch information
spacecowboy committed Jun 4, 2024
1 parent 7694230 commit 4a96932
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -376,36 +376,42 @@ class HtmlLinearizer {
"figure" -> {
finalizeAndAddCurrentElement(blockStyle)

// Wordpress likes nested figures to get images side by side
val imageCandidates =
element.descendantImageCandidates(baseUrl = baseUrl)
// Arstechnica has its own ideas about how to structure things
?: element.ancestorImageCandidates(baseUrl = baseUrl)

if (imageCandidates != null) {
val link = linearTextBuilder.findClosestLink()?.takeIf { it.isNotBlank() }

val caption: LinearText? =
ListBuilderScope {
asElement(blockStyle = LinearTextBlockStyle.TEXT) {
linearizeChildren(
element.childNodes(),
blockStyle = it,
baseUrl = baseUrl,
)
}
}.items.firstOrNull {
// Stuffing non-text inside a caption is not supported
it is LinearText && it.text.isNotBlank()
} as? LinearText
// Some sites put youtube iframes inside figures
val iframes = element.getElementsByTag("iframe")
if (iframes.isNotEmpty()) {
parseIframeVideo(iframes.first())
} else {
// Wordpress likes nested figures to get images side by side
val imageCandidates =
element.descendantImageCandidates(baseUrl = baseUrl)
// Arstechnica has its own ideas about how to structure things
?: element.ancestorImageCandidates(baseUrl = baseUrl)

add(
LinearImage(
sources = imageCandidates,
caption = caption,
link = link,
),
)
if (imageCandidates != null) {
val link = linearTextBuilder.findClosestLink()?.takeIf { it.isNotBlank() }

val caption: LinearText? =
ListBuilderScope {
asElement(blockStyle = LinearTextBlockStyle.TEXT) {
linearizeChildren(
element.childNodes(),
blockStyle = it,
baseUrl = baseUrl,
)
}
}.items.firstOrNull {
// Stuffing non-text inside a caption is not supported
it is LinearText && it.text.isNotBlank()
} as? LinearText

add(
LinearImage(
sources = imageCandidates,
caption = caption,
link = link,
),
)
}
}
}

Expand Down Expand Up @@ -580,23 +586,8 @@ class HtmlLinearizer {
}

"iframe" -> {
getVideo(element.attr("abs:src").ifBlank { null })?.let { video ->
add(
LinearVideo(
sources =
listOf(
LinearVideoSource(
uri = video.src,
link = video.link,
imageThumbnail = video.imageUrl,
widthPx = video.width,
heightPx = video.height,
mimeType = null,
),
),
),
)
}
finalizeAndAddCurrentElement(blockStyle)
parseIframeVideo(element)
}

"video" -> {
Expand Down Expand Up @@ -638,6 +629,26 @@ class HtmlLinearizer {
}
}

private fun ListBuilderScope<LinearElement>.parseIframeVideo(element: Element) {
getVideo(element.attr("abs:src").ifBlank { null })?.let { video ->
add(
LinearVideo(
sources =
listOf(
LinearVideoSource(
uri = video.src,
link = video.link,
imageThumbnail = video.imageUrl,
widthPx = video.width,
heightPx = video.height,
mimeType = null,
),
),
),
)
}
}

private fun append(c: String) {
linearTextBuilder.append(c)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,40 @@ class HtmlLinearizerTest {
)
}

@Test
fun `iframe inside figure with youtube video`() {
// Seen on AlltOmElbil.se
val html =
"""
<html><body>
<figure class="wp-block-embed is-type-video is-provider-youtube wp-block-embed-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio"><div class="wp-block-embed__wrapper">
<iframe title="Därför är el-lastbilar bättre än diesel-lastbilar" width="1170" height="658" src="https://www.youtube.com/embed/x_m02bUxfvE?feature=oembed" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen=""></iframe>
</div></figure>
</body></html>
""".trimIndent()
val baseUrl = "https://example.com"

val result = linearizer.linearize(html, baseUrl).elements

assertEquals(1, result.size, "Expected one item: $result")
assertEquals(
LinearVideo(
sources =
listOf(
LinearVideoSource(
"https://www.youtube.com/embed/x_m02bUxfvE?feature=oembed",
"https://www.youtube.com/watch?v=x_m02bUxfvE",
"http://img.youtube.com/vi/x_m02bUxfvE/hqdefault.jpg",
480,
360,
null,
),
),
),
result[0],
)
}

@Test
fun `table block 2x2`() {
val html = "<html><body><table><tr><th>1</th><td>2</td></tr><tr><td>3</td><th>4</th></tr></table></body></html>"
Expand Down

0 comments on commit 4a96932

Please sign in to comment.