diff --git a/app/src/main/java/com/nononsenseapps/feeder/model/html/HtmlLinearizer.kt b/app/src/main/java/com/nononsenseapps/feeder/model/html/HtmlLinearizer.kt index c44e38dc4..d7737df44 100644 --- a/app/src/main/java/com/nononsenseapps/feeder/model/html/HtmlLinearizer.kt +++ b/app/src/main/java/com/nononsenseapps/feeder/model/html/HtmlLinearizer.kt @@ -376,36 +376,42 @@ class HtmlLinearizer { "figure" -> { finalizeAndAddCurrentElement(blockStyle) - // Wordpress likes nested figures to get images side by side - val imageCandidates = - element.descendantImageCandidates(baseUrl = baseUrl) - // Arstechnica has its own ideas about how to structure things - ?: element.ancestorImageCandidates(baseUrl = baseUrl) - - if (imageCandidates != null) { - val link = linearTextBuilder.findClosestLink()?.takeIf { it.isNotBlank() } - - val caption: LinearText? = - ListBuilderScope { - asElement(blockStyle = LinearTextBlockStyle.TEXT) { - linearizeChildren( - element.childNodes(), - blockStyle = it, - baseUrl = baseUrl, - ) - } - }.items.firstOrNull { - // Stuffing non-text inside a caption is not supported - it is LinearText && it.text.isNotBlank() - } as? LinearText + // Some sites put youtube iframes inside figures + val iframes = element.getElementsByTag("iframe") + if (iframes.isNotEmpty()) { + parseIframeVideo(iframes.first()) + } else { + // Wordpress likes nested figures to get images side by side + val imageCandidates = + element.descendantImageCandidates(baseUrl = baseUrl) + // Arstechnica has its own ideas about how to structure things + ?: element.ancestorImageCandidates(baseUrl = baseUrl) - add( - LinearImage( - sources = imageCandidates, - caption = caption, - link = link, - ), - ) + if (imageCandidates != null) { + val link = linearTextBuilder.findClosestLink()?.takeIf { it.isNotBlank() } + + val caption: LinearText? = + ListBuilderScope { + asElement(blockStyle = LinearTextBlockStyle.TEXT) { + linearizeChildren( + element.childNodes(), + blockStyle = it, + baseUrl = baseUrl, + ) + } + }.items.firstOrNull { + // Stuffing non-text inside a caption is not supported + it is LinearText && it.text.isNotBlank() + } as? LinearText + + add( + LinearImage( + sources = imageCandidates, + caption = caption, + link = link, + ), + ) + } } } @@ -580,23 +586,8 @@ class HtmlLinearizer { } "iframe" -> { - getVideo(element.attr("abs:src").ifBlank { null })?.let { video -> - add( - LinearVideo( - sources = - listOf( - LinearVideoSource( - uri = video.src, - link = video.link, - imageThumbnail = video.imageUrl, - widthPx = video.width, - heightPx = video.height, - mimeType = null, - ), - ), - ), - ) - } + finalizeAndAddCurrentElement(blockStyle) + parseIframeVideo(element) } "video" -> { @@ -638,6 +629,26 @@ class HtmlLinearizer { } } + private fun ListBuilderScope.parseIframeVideo(element: Element) { + getVideo(element.attr("abs:src").ifBlank { null })?.let { video -> + add( + LinearVideo( + sources = + listOf( + LinearVideoSource( + uri = video.src, + link = video.link, + imageThumbnail = video.imageUrl, + widthPx = video.width, + heightPx = video.height, + mimeType = null, + ), + ), + ), + ) + } + } + private fun append(c: String) { linearTextBuilder.append(c) } diff --git a/app/src/test/java/com/nononsenseapps/feeder/model/html/HtmlLinearizerTest.kt b/app/src/test/java/com/nononsenseapps/feeder/model/html/HtmlLinearizerTest.kt index fdbac7669..3c9917262 100644 --- a/app/src/test/java/com/nononsenseapps/feeder/model/html/HtmlLinearizerTest.kt +++ b/app/src/test/java/com/nononsenseapps/feeder/model/html/HtmlLinearizerTest.kt @@ -598,6 +598,40 @@ class HtmlLinearizerTest { ) } + @Test + fun `iframe inside figure with youtube video`() { + // Seen on AlltOmElbil.se + val html = + """ + +
+ +
+ + """.trimIndent() + val baseUrl = "https://example.com" + + val result = linearizer.linearize(html, baseUrl).elements + + assertEquals(1, result.size, "Expected one item: $result") + assertEquals( + LinearVideo( + sources = + listOf( + LinearVideoSource( + "https://www.youtube.com/embed/x_m02bUxfvE?feature=oembed", + "https://www.youtube.com/watch?v=x_m02bUxfvE", + "http://img.youtube.com/vi/x_m02bUxfvE/hqdefault.jpg", + 480, + 360, + null, + ), + ), + ), + result[0], + ) + } + @Test fun `table block 2x2`() { val html = "
12
34
"